In [None]:
#This script is for YOLOv8 object detection using the ultralytics library.
#It loads a pre-trained YOLOv8 model and performs inference on an input image.
#It requires the ultralytics library to be installed.
#Convert YOLO Bounding Boxes to COCO format (xcenter, ycenter, width, height) to (xmin, ymin, width, height)
from ultralytics import YOLO
from ultralytics.data.augment import LetterBox
from PIL import Image
import cv2
import numpy as np


def load_yolo_model(model_path="yolov8.pt", confidence_threshold=0.5, iou_threshold=0.6):
    model = YOLO(model_path)
    model.conf = confidence_threshold 
    model.iou = iou_threshold  # Set IoU threshold for NMS
    return model


def yolo_predict(model, image_path, img_size=640):
    #results = model.predict(image_path, imgsz=img_size, conf=model.conf, iou=model.iou )[0] 
    image = cv2.imread(image_path)
    results = model.predict(image, imgsz=img_size, conf=model.conf, iou=model.iou )[0] 

    predictions = []

    for box in results.boxes:
        x_c, y_c, w, h = box.xywh[0].tolist()
        x = x_c - w / 2
        y = y_c - h / 2

        conf = float(box.conf[0])
        cls_id = int(box.cls[0])

        predictions.append({
            "bbox": [x, y, w, h],  # COCO format
            "score": conf,
            "category_id": cls_id
        })

    return predictions

def yolo_predict_from_image(model, image, img_size=640):
    """
    Predicts the bounding boxes, class ids, and confidence scores using the YOLO model.
    :param model: YOLO model instance
    :param image: Input image as a numpy array
    :param img_size: Desired input image size (default is 640)
    :return: List of predictions in COCO format with bounding boxes, scores, and class ids
    """

    # Run prediction using the YOLO model
    results = model.predict(image, imgsz=img_size, conf=model.conf, iou=model.iou)[0]

    predictions = []

    # Process each predicted bounding box
    for box in results.boxes:
        x_c, y_c, w, h = box.xywh[0].tolist()
        x = x_c - w / 2
        y = y_c - h / 2

        conf = float(box.conf[0])
        cls_id = int(box.cls[0])

        predictions.append({
            "bbox": [x, y, w, h],  # COCO format (x, y, width, height)
            "score": conf,
            "category_id": cls_id
        })

    return predictions


In [4]:
# This script is for Detectron2 object detection using the detectron2 library.
# It loads a pre-trained Detectron2 model and performs inference on an input image.
# It requires the detectron2 library to be installed.
# Convert Detectron2 Bounding Boxes to COCO format (x1, y1, x2, y2) to (x1, y1, width, height)

import cv2
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor

def load_detectron2_model(config_path="COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml",
                          num_classes=2, weights_path=None, score_thresh=0.5):
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(config_path))
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = num_classes  
    if weights_path:
        cfg.MODEL.WEIGHTS = weights_path
    else:
        cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(config_path)
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = score_thresh
    predictor = DefaultPredictor(cfg)
    return predictor

def detectron2_predict(predictor, image_path, class_names=None):
    img = cv2.imread(image_path)
    outputs = predictor(img)
    instances = outputs["instances"].to("cpu")

    pred_boxes = instances.pred_boxes.tensor.numpy()
    pred_classes = instances.pred_classes.numpy()
    scores = instances.scores.numpy()

    predictions = []

    for i in range(len(pred_boxes)):
        x1, y1, x2, y2 = pred_boxes[i]
        w = x2 - x1
        h = y2 - y1

        cls_id = int(pred_classes[i])
        label = class_names[cls_id] if class_names else cls_id

        predictions.append({
            "bbox": [x1, y1, w, h],  # COCO format
            "score": float(scores[i]),
            "category_id": label
        })

    return predictions


def detectron2_predict_from_image(predictor, image, class_names=None):
    """
    Predicts the bounding boxes, class ids, and confidence scores using the Detectron2 model.
    :param predictor: Detectron2 predictor instance
    :param image: Input image as a numpy array (in BGR format)
    :param class_names: List of class names (optional, used for label formatting)
    :return: List of predictions in COCO format with bounding boxes, scores, and category ids
    """
    # Run prediction using the Detectron2 model
    outputs = predictor(image)
    instances = outputs["instances"].to("cpu")

    # Extract predictions
    pred_boxes = instances.pred_boxes.tensor.numpy()
    pred_classes = instances.pred_classes.numpy()
    scores = instances.scores.numpy()

    predictions = []

    # Process each predicted bounding box
    for i in range(len(pred_boxes)):
        x1, y1, x2, y2 = pred_boxes[i]
        w = x2 - x1
        h = y2 - y1

        cls_id = int(pred_classes[i])
        label = class_names[cls_id] if class_names else cls_id

        predictions.append({
            "bbox": [x1, y1, w, h],  # COCO format (x, y, width, height)
            "score": float(scores[i]),
            "category_id": label
        })

    return predictions

In [1]:
import os
import glob
import json
from tqdm import tqdm
from PIL import Image
from ensemble_boxes import weighted_boxes_fusion

def process_folder(folder_path, yolo_model, detectron_model, iou_thresh=0.55, output_json="fused_predictions.json"):
    image_paths = glob.glob(os.path.join(folder_path, "*.jpeg"))
    coco_predictions = []

    for image_path in tqdm(image_paths, desc="Processing images"):
        image = Image.open(image_path)
        img_w, img_h = image.size
        image_id = os.path.splitext(os.path.basename(image_path))[0]

        # Run predictions
        yolo_preds = yolo_predict(yolo_model, image_path, 640)
        detectron_preds = detectron2_predict(detectron_model, image_path)

        # Prepare for WBF
        def prepare(preds, img_w, img_h):
            boxes, scores, labels = [], [], []
            for pred in preds:
                # Extract COCO format: [xmin, ymin, width, height]
                xmin, ymin, width, height = pred["bbox"]
                
                # Convert to [x1, y1, x2, y2] format (no normalization here)
                x1 = xmin / img_w
                y1 = ymin / img_h
                x2 = (xmin + width) / img_w
                y2 = (ymin + height) / img_h
                
                boxes.append([x1, y1, x2, y2])  # This is the correct [x1, y1, x2, y2] format
                scores.append(pred["score"])  # Keep the confidence score as it is
                labels.append(pred["category_id"])  # Assuming category_id is properly assigned
            return boxes, scores, labels

        yolo_boxes, yolo_scores, yolo_labels = prepare(yolo_preds, img_w, img_h)
        d2_boxes, d2_scores, d2_labels = prepare(detectron_preds, img_w, img_h)

        all_boxes = [yolo_boxes, d2_boxes]
        all_scores = [yolo_scores, d2_scores]
        all_labels = [yolo_labels, d2_labels]

        if not any(all_boxes):
            continue  # Skip empty

        # Weighted Box Fusion
        boxes_fused, scores_fused, labels_fused = weighted_boxes_fusion(
            all_boxes, all_scores, all_labels, iou_thr=iou_thresh, skip_box_thr=0.0001
        )

        for box, score, label in zip(boxes_fused, scores_fused, labels_fused):
            x1 = box[0] * img_w
            y1 = box[1] * img_h
            x2 = box[2] * img_w
            y2 = box[3] * img_h
            w = x2 - x1
            h = y2 - y1

            coco_predictions.append({
                "image_id": image_id,
                "category_id": int(label),
                "bbox": [x1, y1, w, h],
                "score": float(score)
            })

    with open(output_json, 'w') as f:
        json.dump(coco_predictions, f, indent=4)

    print(f"\nâœ… COCO-style prediction results saved to {output_json}")


In [None]:
image_folder = r"D:\BTXRD-Dataset\BTXRD-Yolo\val\images"
yolo = load_yolo_model("./runs/detect/paper-data/weights/paper-best.pt", confidence_threshold=0.01, iou_threshold=0.6)
detectron = load_detectron2_model("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml", 2, "./detectron_runs/model_best_6280.pth", score_thresh=0.01)

process_folder(
    folder_path=image_folder,
    yolo_model=yolo,
    detectron_model=detectron,
    output_json="fused_output.json",
    iou_thresh=0.5
)