In [5]:
print(annotations.keys())  # Check top-level keys
print(annotations['annotations'][0])  # Check the first annotation


dict_keys(['info', 'licenses', 'categories', 'annotations'])
{'image_name': 'frame_20190829091111_x_0001973.jpg', 'image_width:': 1920.0, 'image_height': 1080.0, 'platform': 'Parrot Bebop 2', 'time': {'year': 2019, 'month': 8, 'day': 29, 'hour': 9, 'min': 11, 'sec': 11, 'ms': 394400.0}, 'longtitude': 10.18798203255313, 'latitude': 56.20630134795274, 'altitude': 19921.6, 'linear_x': 0.03130074199289083, 'linear_y': 0.028357808757573367, 'linear_z': 0.0744575835764408, 'angle_phi': -0.06713105738162994, 'angle_theta': 0.06894744634628296, 'angle_psi': 1.1161083340644837, 'bbox': [{'top': 163, 'left': 1098, 'height': 185, 'width': 420, 'class': 1}, {'top': 421, 'left': 1128, 'height': 176, 'width': 393, 'class': 1}, {'top': 927, 'left': 1703, 'height': 153, 'width': 183, 'class': 0}]}


 YOLOS-Tiny Object Detection Script (Train + Evaluate)

In [18]:
import wandb
from transformers import YolosImageProcessor, YolosForObjectDetection
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torch
import json
import os
from tqdm import tqdm
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import numpy as np

# 🔧 Paths
root_dir = r"C:\Users\nesil.bor\Desktop\Folders\master\DI725\DI725_Assignment2_2030336\data\auair2019"
annotation_path = os.path.join(root_dir, "annotations.json")
img_dir = r"C:\Users\nesil.bor\Desktop\Folders\master\DI725\DI725_Assignment2_2030336\data\auair2019\images"
device = "cuda" if torch.cuda.is_available() else "cpu"

# 🟣 Init W&B
wandb.init(project="di725-assignment2", name="yolos-tiny-train")

# ⚙️ Load model + processor
processor = YolosImageProcessor.from_pretrained("hustvl/yolos-tiny")
model = YolosForObjectDetection.from_pretrained("hustvl/yolos-tiny")
model.to(device)

# ⚙️ Custom Dataset
class AUAIRDataset(Dataset):
    def __init__(self, annotations, img_dir, processor, split="train"):
        self.annotations = annotations["annotations"]
        self.images = annotations["images"]
        self.img_dir = img_dir
        self.processor = processor
        # Map image_id to annotations for efficient lookup
        self.ann_by_image_id = {}
        for ann in self.annotations:
            img_id = ann["image_id"]
            if img_id not in self.ann_by_image_id:
                self.ann_by_image_id[img_id] = []
            self.ann_by_image_id[img_id].append(ann)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_info = self.images[idx]
        img_id = img_info["id"]
        img_path = os.path.join(self.img_dir, img_info["file_name"])
        
        try:
            image = Image.open(img_path).convert("RGB")
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            return None

        # Get annotations for this image
        anns = self.ann_by_image_id.get(img_id, [])
        boxes = []
        labels = []
        for ann in anns:
            # COCO-style bbox: [x, y, width, height]
            x, y, w, h = ann["bbox"]
            # Convert to [x_min, y_min, x_max, y_max] for YOLOS
            boxes.append([x, y, x + w, y + h])
            labels.append(ann["category_id"])

        # Convert to tensors
        target = {
            "boxes": torch.tensor(boxes, dtype=torch.float32) if boxes else torch.empty((0, 4), dtype=torch.float32),
            "labels": torch.tensor(labels, dtype=torch.long) if labels else torch.empty((0,), dtype=torch.long),
            "image_id": torch.tensor([img_id]),
        }

        # Process image and annotations
        # Include "area" and "iscrowd" for COCO compatibility
        processor_annotations = [
            {
                "bbox": [x, y, w, h],  # COCO format [x, y, width, height]
                "category_id": l,
                "area": float(w * h),  # Compute area
                "iscrowd": 0  # Default to 0 (no crowd)
            }
            for (x, y, w, h), l in zip((ann["bbox"] for ann in anns), labels)
        ]
        encoding = self.processor(
            images=image,
            annotations={"image_id": img_id, "annotations": processor_annotations},
            return_tensors="pt"
        )
        encoding["pixel_values"] = encoding["pixel_values"].squeeze(0)  # Remove batch dimension
        encoding["labels"] = target

        return encoding, image, img_id, img_info["file_name"]

# 📂 Load annotations
with open(annotation_path) as f:
    raw_annotations = json.load(f)

# Create pseudo-COCO format
image_map = {}  # Map image_id to image metadata
coco_annotations = []  # COCO-style annotations
for idx, ann in enumerate(raw_annotations["annotations"]):
    img_name = ann["image_name"]
    img_id = idx + 1  # Assign unique image_id (1-based indexing)
    image_map[img_id] = {
        "file_name": img_name,
        "width": ann["image_width:"],
        "height": ann["image_height"],
    }
    # Convert bbox list to COCO-style annotations
    for bbox in ann["bbox"]:
        coco_annotations.append({
            "image_id": img_id,
            "category_id": bbox["class"],
            "bbox": [bbox["left"], bbox["top"], bbox["width"], bbox["height"]],
            "area": bbox["width"] * bbox["height"],
            "id": len(coco_annotations) + 1,  # Unique annotation ID
        })

# Create pseudo-COCO structure
annotations = {
    "images": [{"id": img_id, "file_name": img["file_name"], "width": img["width"], "height": img["height"]} for img_id, img in image_map.items()],
    "annotations": coco_annotations,
    "categories": [{"id": i, "name": name} for i, name in enumerate(raw_annotations["categories"])],
}

# Split dataset (80% train, 20% val)
np.random.seed(42)
img_ids = [img["id"] for img in annotations["images"]]
np.random.shuffle(img_ids)
train_size = int(0.8 * len(img_ids))
train_ids = img_ids[:train_size]
val_ids = img_ids[train_size:]

train_images = [img for img in annotations["images"] if img["id"] in train_ids]
val_images = [img for img in annotations["images"] if img["id"] in val_ids]
train_annotations = {
    "images": train_images,
    "annotations": [ann for ann in annotations["annotations"] if ann["image_id"] in train_ids],
    "categories": annotations["categories"],
}
val_annotations = {
    "images": val_images,
    "annotations": [ann for ann in annotations["annotations"] if ann["image_id"] in val_ids],
    "categories": annotations["categories"],
}

train_dataset = AUAIRDataset(train_annotations, img_dir, processor, split="train")
val_dataset = AUAIRDataset(val_annotations, img_dir, processor, split="val")

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0, collate_fn=lambda x: [xi for xi in x if xi is not None])
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=0, collate_fn=lambda x: [xi for xi in x if xi is not None])

# 🧠 Training Loop
def train_model(model, train_loader, val_loader, num_epochs=10, lr=5e-5):
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            optimizer.zero_grad()
            pixel_values = torch.stack([item[0]["pixel_values"] for item in batch]).to(device)
            labels = [item[0]["labels"] for item in batch]

            outputs = model(pixel_values=pixel_values, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        avg_train_loss = train_loss / len(train_loader)
        wandb.log({"epoch": epoch, "train_loss": avg_train_loss})

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch in tqdm(val_loader, desc="Validation"):
                pixel_values = torch.stack([item[0]["pixel_values"] for item in batch]).to(device)
                labels = [item[0]["labels"] for item in batch]
                outputs = model(pixel_values=pixel_values, labels=labels)
                val_loss += outputs.loss.item()

        avg_val_loss = val_loss / len(val_loader)
        wandb.log({"epoch": epoch, "val_loss": avg_val_loss})
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

        scheduler.step()

    # Save the trained model
    model.save_pretrained("yolos-tiny-finetuned")
    processor.save_pretrained("yolos-tiny-finetuned")

# 🧠 Inference
def run_yolos_inference(model, dataset, output_path="yolos_pred.json", log_images=False):
    model.eval()
    results = []

    for idx in tqdm(range(len(dataset))):
        inputs, image, img_id, image_name = dataset[idx]
        if inputs is None:
            continue
        inputs = {k: v.unsqueeze(0).to(device) for k, v in inputs.items() if k == "pixel_values"}

        with torch.no_grad():
            outputs = model(**inputs)

        width, height = image.size
        target_sizes = torch.tensor([[height, width]]).to(device)
        
        result = processor.post_process_object_detection(
            outputs,
            target_sizes=target_sizes,
            threshold=0.5
        )[0]

        if log_images and idx % 50 == 0 and len(result["boxes"]) > 0:
            boxes = result["boxes"].cpu().tolist()
            scores = result["scores"].cpu().tolist()
            labels = result["labels"].cpu().tolist()

            wandb.log({
                "prediction": wandb.Image(image, boxes={
                    "predictions": {
                        "box_data": [
                            {
                                "position": {
                                    "minX": b[0] / width,
                                    "minY": b[1] / height,
                                    "maxX": b[2] / width,
                                    "maxY": b[3] / height,
                                },
                                "score": s,
                                "class_id": l
                            }
                            for b, s, l in zip(boxes, scores, labels)
                        ],
                        "class_labels": {i: name for i, name in enumerate(raw_annotations["categories"])}
                    }
                }),
                "step": idx
            })

        for box, label, score in zip(result["boxes"], result["labels"], result["scores"]):
            xmin, ymin, xmax, ymax = box
            results.append({
                "image_id": img_id,
                "category_id": int(label),
                "bbox": [float(xmin), float(ymin), float(xmax - xmin), float(ymax - ymin)],
                "score": float(score)
            })

    with open(output_path, "w") as f:
        json.dump(results, f)
    return output_path

# 📊 mAP Evaluation
def evaluate_map(gt_path, pred_path):
    coco_gt = COCO(gt_path)
    coco_dt = coco_gt.loadRes(pred_path)
    coco_eval = COCOeval(coco_gt, coco_dt, iouType="bbox")
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    metrics = {
        "mAP@[0.5:0.95]": coco_eval.stats[0],
        "AP50": coco_eval.stats[1],
        "AP75": coco_eval.stats[2],
        "AP_small": coco_eval.stats[3],
        "AP_medium": coco_eval.stats[4],
        "AP_large": coco_eval.stats[5]
    }

    precisions = coco_eval.eval["precision"]
    cat_ids = coco_gt.getCatIds()
    categories = coco_gt.loadCats(cat_ids)

    print("\n📊 Per-category AP (IoU=0.50:0.95):")
    for idx, cat in enumerate(categories):
        precision = precisions[:, :, idx, 0, 0]
        precision = precision[precision > -1]
        ap = precision.mean() if precision.size > 0 else float("nan")
        metrics[f"AP_{cat['name']}"] = ap
        print(f"  {cat['name']:20s}: {ap:.4f}")

    wandb.log(metrics)
    print("✅ mAP + per-class AP metrics logged to W&B.")
    return metrics

# 🧪 Run Training and Evaluation
# Save ground truth annotations to gt.json
with open("gt.json", "w") as f:
    json.dump(val_annotations, f)

# Train the model
train_model(model, train_loader, val_loader, num_epochs=10, lr=5e-5)

# Run inference on validation set
pred_json = run_yolos_inference(model, val_dataset, output_path="yolos_pred.json", log_images=True)

# Evaluate
evaluate_map("gt.json", pred_json)

Epoch 1/10:   0%|          | 0/6565 [00:00<?, ?it/s]


KeyError: 'class_labels'

In [None]:
import wandb
from transformers import YolosImageProcessor, YolosForObjectDetection
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torch
import json
import os
from tqdm import tqdm
import numpy as np

# 🔧 Paths
root_dir = r"C:\Users\nesil.bor\Desktop\Folders\master\DI725\DI725_Assignment2_2030336\data\auair2019"
annotation_path = os.path.join(root_dir, "annotations.json")
img_dir = r"C:\Users\nesil.bor\Desktop\Folders\master\DI725\DI725_Assignment2_2030336\data\auair2019\images"
device = "cuda" if torch.cuda.is_available() else "cpu"

# 🟣 Init W&B
wandb.init(project="di725-assignment2", name="yolos-tiny-train")

# ⚙️ Load model + processor
processor = YolosImageProcessor.from_pretrained("hustvl/yolos-tiny")
model = YolosForObjectDetection.from_pretrained("hustvl/yolos-tiny")
model.to(device)

# ⚙️ Category ID mapping (AUAIR to COCO)
label_map = {0: 1, 1: 3, 2: 8, 3: 7, 4: 4, 5: 2, 6: 6, 7: 10}  # AUAIR to COCO

# ⚙️ Custom Dataset
class AUAIRDataset(Dataset):
    def __init__(self, annotations, img_dir, processor, split="train"):
        self.annotations = annotations["annotations"]
        self.images = annotations["images"]
        self.img_dir = img_dir
        self.processor = processor
        # Map image_id to annotations for efficient lookup
        self.ann_by_image_id = {}
        for ann in self.annotations:
            img_id = ann["image_id"]
            if img_id not in self.ann_by_image_id:
                self.ann_by_image_id[img_id] = []
            self.ann_by_image_id[img_id].append(ann)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_info = self.images[idx]
        img_id = img_info["id"]
        img_path = os.path.join(self.img_dir, img_info["file_name"])
        
        try:
            image = Image.open(img_path).convert("RGB")
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            return None

        # Get annotations for this image
        anns = self.ann_by_image_id.get(img_id, [])
        boxes = []
        labels = []
        for ann in anns:
            # COCO-style bbox: [x, y, width, height]
            x, y, w, h = ann["bbox"]
            # Convert to [x_min, y_min, x_max, y_max] for YOLOS
            boxes.append([x, y, x + w, y + h])
            labels.append(label_map[ann["category_id"]])  # Map AUAIR to COCO IDs

        # Convert to tensors
        target = {
            "boxes": torch.tensor(boxes, dtype=torch.float32) if boxes else torch.empty((0, 4), dtype=torch.float32),
            "class_labels": torch.tensor(labels, dtype=torch.long) if labels else torch.empty((0,), dtype=torch.long),
            "image_id": torch.tensor([img_id]),
        }

        # Process image and annotations
        # Include "area" and "iscrowd" for COCO compatibility
        processor_annotations = [
            {
                "bbox": [x, y, w, h],  # COCO format [x, y, width, height]
                "category_id": label_map[l],  # Map AUAIR to COCO IDs
                "area": float(w * h),  # Compute area
                "iscrowd": 0  # Default to 0 (no crowd)
            }
            for (x, y, w, h), l in zip((ann["bbox"] for ann in anns), [ann["category_id"] for ann in anns])
        ]
        encoding = self.processor(
            images=image,
            annotations={"image_id": img_id, "annotations": processor_annotations},
            return_tensors="pt"
        )
        encoding["pixel_values"] = encoding["pixel_values"].squeeze(0)  # Remove batch dimension
        encoding["labels"] = target

        return encoding, image, img_id, img_info["file_name"]

# 📂 Load annotations
with open(annotation_path) as f:
    raw_annotations = json.load(f)

# Create pseudo-COCO format
image_map = {}  # Map image_id to image metadata
coco_annotations = []  # COCO-style annotations
for idx, ann in enumerate(raw_annotations["annotations"]):
    img_name = ann["image_name"]
    img_id = idx + 1  # Assign unique image_id (1-based indexing)
    image_map[img_id] = {
        "file_name": img_name,
        "width": ann["image_width:"],
        "height": ann["image_height"],
    }
    # Convert bbox list to COCO-style annotations
    for bbox in ann["bbox"]:
        coco_annotations.append({
            "image_id": img_id,
            "category_id": bbox["class"],
            "bbox": [bbox["left"], bbox["top"], bbox["width"], bbox["height"]],
            "area": bbox["width"] * bbox["height"],
            "id": len(coco_annotations) + 1,  # Unique annotation ID
        })

# Create pseudo-COCO structure
annotations = {
    "images": [{"id": img_id, "file_name": img["file_name"], "width": img["width"], "height": img["height"]} for img_id, img in image_map.items()],
    "annotations": coco_annotations,
    "categories": [{"id": i, "name": name} for i, name in enumerate(raw_annotations["categories"])],
}

# Split dataset (80% train, 20% val)
np.random.seed(42)
img_ids = [img["id"] for img in annotations["images"]]
np.random.shuffle(img_ids)
train_size = int(0.8 * len(img_ids))
train_ids = img_ids[:train_size]
val_ids = img_ids[train_size:]

train_images = [img for img in annotations["images"] if img["id"] in train_ids]
val_images = [img for img in annotations["images"] if img["id"] in val_ids]
train_annotations = {
    "images": train_images,
    "annotations": [ann for ann in annotations["annotations"] if ann["image_id"] in train_ids],
    "categories": annotations["categories"],
}
val_annotations = {
    "images": val_images,
    "annotations": [ann for ann in annotations["annotations"] if ann["image_id"] in val_ids],
    "categories": annotations["categories"],
}

train_dataset = AUAIRDataset(train_annotations, img_dir, processor, split="train")
val_dataset = AUAIRDataset(val_annotations, img_dir, processor, split="val")

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0, collate_fn=lambda x: [xi for xi in x if xi is not None])
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=0, collate_fn=lambda x: [xi for xi in x if xi is not None])

# 🧠 Training Loop
def train_model(model, train_loader, val_loader, num_epochs=10, lr=5e-5):
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            optimizer.zero_grad()
            pixel_values = torch.stack([item[0]["pixel_values"] for item in batch]).to(device)
            labels = [item[0]["labels"] for item in batch]

            # Move all tensors in labels to the correct device
            labels = [
                {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in label.items()}
                for label in labels
            ]

            outputs = model(pixel_values=pixel_values, labels=labels)
            loss = outputs.loss
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        avg_train_loss = train_loss / len(train_loader)
        wandb.log({"epoch": epoch, "train_loss": avg_train_loss})

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch in tqdm(val_loader, desc="Validation"):
                pixel_values = torch.stack([item[0]["pixel_values"] for item in batch]).to(device)
                labels = [item[0]["labels"] for item in batch]

                # Move all tensors in labels to the correct device
                labels = [
                    {k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in label.items()}
                    for label in labels
                ]

                outputs = model(pixel_values=pixel_values, labels=labels)
                val_loss += outputs.loss.item()

        avg_val_loss = val_loss / len(val_loader)
        wandb.log({"epoch": epoch, "val_loss": avg_val_loss})
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

        scheduler.step()

    # Save the trained model
    model.save_pretrained("yolos-tiny-finetuned")
    processor.save_pretrained("yolos-tiny-finetuned")

# 🧪 Run Training
# Save ground truth annotations for evaluation
with open("gt.json", "w") as f:
    json.dump(val_annotations, f)

train_model(model, train_loader, val_loader, num_epochs=10, lr=5e-5)

Epoch 1/10:   0%|          | 0/6565 [00:00<?, ?it/s]


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument x2 in method wrapper_CUDA___cdist_forward)

INFERENCE PART

In [None]:
import wandb
from transformers import YolosImageProcessor, YolosForObjectDetection
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torch
import json
import os
from tqdm import tqdm
import numpy as np

# 🔧 Paths
root_dir = r"C:\Users\nesil.bor\Desktop\Folders\master\DI725\DI725_Assignment2_2030336\data\auair2019"
annotation_path = os.path.join(root_dir, "annotations.json")
img_dir = r"C:\Users\nesil.bor\Desktop\Folders\master\DI725\DI725_Assignment2_2030336\data\auair2019\images"
device = "cuda" if torch.cuda.is_available() else "cpu"

# 🟣 Init W&B
wandb.init(project="di725-assignment2", name="yolos-tiny-inference")

# ⚙️ Load model + processor
processor = YolosImageProcessor.from_pretrained("yolos-tiny-finetuned")
model = YolosForObjectDetection.from_pretrained("yolos-tiny-finetuned")
model.to(device)

# ⚙️ Category ID mapping (COCO to AUAIR for evaluation)
reverse_map = {1: 0, 3: 1, 8: 2, 7: 3, 4: 4, 2: 5, 6: 6, 10: 7}  # COCO to AUAIR

# ⚙️ Custom Dataset
class AUAIRDataset(Dataset):
    def __init__(self, annotations, img_dir, processor, split="val"):
        self.annotations = annotations["annotations"]
        self.images = annotations["images"]
        self.img_dir = img_dir
        self.processor = processor
        # Map image_id to annotations for efficient lookup
        self.ann_by_image_id = {}
        for ann in self.annotations:
            img_id = ann["image_id"]
            if img_id not in self.ann_by_image_id:
                self.ann_by_image_id[img_id] = []
            self.ann_by_image_id[img_id].append(ann)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_info = self.images[idx]
        img_id = img_info["id"]
        img_path = os.path.join(self.img_dir, img_info["file_name"])
        
        try:
            image = Image.open(img_path).convert("RGB")
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            return None

        # Get annotations for this image (not used for inference, but kept for compatibility)
        anns = self.ann_by_image_id.get(img_id, [])
        boxes = []
        labels = []
        for ann in anns:
            x, y, w, h = ann["bbox"]
            boxes.append([x, y, x + w, y + h])
            labels.append(ann["category_id"])

        # Convert to tensors (not used for inference, but kept for compatibility)
        target = {
            "boxes": torch.tensor(boxes, dtype=torch.float32) if boxes else torch.empty((0, 4), dtype=torch.float32),
            "class_labels": torch.tensor(labels, dtype=torch.long) if labels else torch.empty((0,), dtype=torch.long),
            "image_id": torch.tensor([img_id]),
        }

        # Process image
        encoding = self.processor(images=image, return_tensors="pt")
        encoding["pixel_values"] = encoding["pixel_values"].squeeze(0)  # Remove batch dimension
        encoding["labels"] = target

        return encoding, image, img_id, img_info["file_name"]

# 📂 Load annotations
with open(annotation_path) as f:
    raw_annotations = json.load(f)

# Create pseudo-COCO format
image_map = {}  # Map image_id to image metadata
coco_annotations = []  # COCO-style annotations
for idx, ann in enumerate(raw_annotations["annotations"]):
    img_name = ann["image_name"]
    img_id = idx + 1  # Assign unique image_id (1-based indexing)
    image_map[img_id] = {
        "file_name": img_name,
        "width": ann["image_width:"],
        "height": ann["image_height"],
    }
    for bbox in ann["bbox"]:
        coco_annotations.append({
            "image_id": img_id,
            "category_id": bbox["class"],
            "bbox": [bbox["left"], bbox["top"], bbox["width"], bbox["height"]],
            "area": bbox["width"] * bbox["height"],
            "id": len(coco_annotations) + 1,
        })

# Create pseudo-COCO structure
annotations = {
    "images": [{"id": img_id, "file_name": img["file_name"], "width": img["width"], "height": img["height"]} for img_id, img in image_map.items()],
    "annotations": coco_annotations,
    "categories": [{"id": i, "name": name} for i, name in enumerate(raw_annotations["categories"])],
}

# Split dataset (use validation set for inference)
np.random.seed(42)
img_ids = [img["id"] for img in annotations["images"]]
np.random.shuffle(img_ids)
train_size = int(0.8 * len(img_ids))
val_ids = img_ids[train_size:]

val_images = [img for img in annotations["images"] if img["id"] in val_ids]
val_annotations = {
    "images": val_images,
    "annotations": [ann for ann in annotations["annotations"] if ann["image_id"] in val_ids],
    "categories": annotations["categories"],
}

val_dataset = AUAIRDataset(val_annotations, img_dir, processor, split="val")

# 🧠 Inference
def run_yolos_inference(model, dataset, output_path="yolos_pred.json", log_images=False):
    model.eval()
    results = []

    for idx in tqdm(range(len(dataset))):
        inputs, image, img_id, image_name = dataset[idx]
        if inputs is None:
            continue
        inputs = {k: v.unsqueeze(0).to(device) for k, v in inputs.items() if k == "pixel_values"}

        with torch.no_grad():
            outputs = model(**inputs)

        width, height = image.size
        target_sizes = torch.tensor([[height, width]]).to(device)
        
        result = processor.post_process_object_detection(
            outputs,
            target_sizes=target_sizes,
            threshold=0.5
        )[0]

        if log_images and idx % 50 == 0 and len(result["boxes"]) > 0:
            boxes = result["boxes"].cpu().tolist()
            scores = result["scores"].cpu().tolist()
            labels = result["labels"].cpu().tolist()

            wandb.log({
                "prediction": wandb.Image(image, boxes={
                    "predictions": {
                        "box_data": [
                            {
                                "position": {
                                    "minX": b[0] / width,
                                    "minY": b[1] / height,
                                    "maxX": b[2] / width,
                                    "maxY": b[3] / height,
                                },
                                "score": s,
                                "class_id": reverse_map.get(l, l)  # Map back to AUAIR IDs for logging
                            }
                            for b, s, l in zip(boxes, scores, labels)
                        ],
                        "class_labels": {i: name for i, name in enumerate(raw_annotations["categories"])}
                    }
                }),
                "step": idx
            })

        for box, label, score in zip(result["boxes"], result["labels"], result["scores"]):
            xmin, ymin, xmax, ymax = box
            results.append({
                "image_id": img_id,
                "category_id": reverse_map.get(int(label), int(label)),  # Map COCO to AUAIR IDs
                "bbox": [float(xmin), float(ymin), float(xmax - xmin), float(ymax - ymin)],
                "score": float(score)
            })

    with open(output_path, "w") as f:
        json.dump(results, f)
    return output_path

# 🧪 Run Inference
pred_json = run_yolos_inference(model, val_dataset, output_path="yolos_pred.json", log_images=True)
print(f"Predictions saved to {pred_json}")

EVALUATION PART 

In [None]:
import wandb
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import json
import numpy as np

# 🔧 Paths
gt_path = "gt.json"
pred_path = "yolos_pred.json"

# 🟣 Init W&B
wandb.init(project="di725-assignment2", name="yolos-tiny-evaluate")

# 📊 mAP Evaluation
def evaluate_map(gt_path, pred_path):
    coco_gt = COCO(gt_path)
    coco_dt = coco_gt.loadRes(pred_path)
    coco_eval = COCOeval(coco_gt, coco_dt, iouType="bbox")
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    metrics = {
        "mAP@[0.5:0.95]": coco_eval.stats[0],
        "AP50": coco_eval.stats[1],
        "AP75": coco_eval.stats[2],
        "AP_small": coco_eval.stats[3],
        "AP_medium": coco_eval.stats[4],
        "AP_large": coco_eval.stats[5]
    }

    precisions = coco_eval.eval["precision"]
    cat_ids = coco_gt.getCatIds()
    categories = coco_gt.loadCats(cat_ids)

    print("\n📊 Per-category AP (IoU=0.50:0.95):")
    for idx, cat in enumerate(categories):
        precision = precisions[:, :, idx, 0, 0]
        precision = precision[precision > -1]
        ap = precision.mean() if precision.size > 0 else float("nan")
        metrics[f"AP_{cat['name']}"] = ap
        print(f"  {cat['name']:20s}: {ap:.4f}")

    wandb.log(metrics)
    print("✅ mAP + per-class AP metrics logged to W&B.")
    return metrics

# 🧪 Run Evaluation
metrics = evaluate_map(gt_path, pred_path)
print("Evaluation complete.")