In [None]:
from ultralytics import YOLO
from pathlib import Path
import json


model = YOLO("yolov8m.pt")

image_dir = Path("dataset/images")
output_dir = Path("outputs/normal_yolo")
output_dir.mkdir(parents=True, exist_ok=True)

image_paths = sorted(image_dir.glob("*.jpg"))

# Running inference on coco class 2 as it was a by default car class
results = model(
    [str(p) for p in image_paths],
    conf=0.15,
    classes=[2],
    device="cpu"
)



for img_path, r in zip(image_paths, results):
    r.save(filename=str(output_dir / img_path.name))

print("Normal YOLOv8 (car-only) predictions saved")

#coco json

coco_preds = []
image_id = 1  

for r in results:
    if r.boxes is None:
        image_id += 1
        continue

    for box in r.boxes:
        x1, y1, x2, y2 = box.xyxy[0].tolist()
        coco_preds.append({
            "image_id": image_id,
            "category_id": 0,   
            "bbox": [x1, y1, x2 - x1, y2 - y1],
            "score": float(box.conf[0])
        })

    image_id += 1

with open("normal_yolo_preds.json", "w") as f:
    json.dump(coco_preds, f)

print("Normal YOLOv8 predictions JSON saved")



0: 640x640 13 cars, 428.6ms
1: 640x640 11 cars, 428.6ms
2: 640x640 3 cars, 428.6ms
Speed: 5.0ms preprocess, 428.6ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)
Normal YOLOv8 (car-only) predictions saved
Normal YOLOv8 predictions JSON saved


In [None]:
from sahi import AutoDetectionModel
from sahi.predict import get_sliced_prediction
from pathlib import Path
import json

detection_model = AutoDetectionModel.from_pretrained(
    model_type="yolov8",
    model_path="yolov8m.pt",
    confidence_threshold=0.15,
    device="cpu"
)

image_dir = Path("dataset/images")
output_dir = Path("outputs/sahi_yolo")
output_dir.mkdir(parents=True, exist_ok=True)


EXCLUDE_IDS = [i for i in range(80) if i != 2]

coco_preds = []
image_id = 1  # MUST match GT image_id order

for img_path in sorted(image_dir.glob("*.jpg")):
    result = get_sliced_prediction(
        image=str(img_path),
        detection_model=detection_model,
        slice_height=512,
        slice_width=512,
        overlap_height_ratio=0.25,
        overlap_width_ratio=0.25,
        exclude_classes_by_id=EXCLUDE_IDS
    )

    
    result.export_visuals(
        export_dir=str(output_dir),
        file_name=img_path.stem
    )

    
    for obj in result.object_prediction_list:
        coco_preds.append({
            "image_id": image_id,
            "category_id": 0,  
            "bbox": obj.bbox.to_xywh(),
            "score": obj.score.value
        })

    image_id += 1

with open("sahi_yolo_preds.json", "w") as f:
    json.dump(coco_preds, f)

print("YOLOv8 + SAHI (car-only) predictions saved + JSON exported")


Performing prediction on 66 slices.
Performing prediction on 80 slices.
Performing prediction on 80 slices.
YOLOv8 + SAHI (car-only) predictions saved + JSON exported


In [None]:
import json
from collections import defaultdict
from pathlib import Path


GT_JSON_PATH = Path(r"C:\Users\akash\akash_rawal_1\small_object\dataset\annotations\instances_gt_fixed.json")
YOLO_JSON_PATH = Path(r"C:\Users\akash\akash_rawal_1\small_object\normal_yolo_preds.json")
SAHI_JSON_PATH = Path(r"C:\Users\akash\akash_rawal_1\small_object\sahi_yolo_preds.json")


IOU_THRESHOLD = 0.5



def xywh_to_xyxy(box):
    x, y, w, h = box
    return [x, y, x + w, y + h]

def iou(box1, box2):
    x1, y1, x2, y2 = box1
    x1g, y1g, x2g, y2g = box2

    xi1, yi1 = max(x1, x1g), max(y1, y1g)
    xi2, yi2 = min(x2, x2g), min(y2, y2g)

    inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
    box1_area = (x2 - x1) * (y2 - y1)
    box2_area = (x2g - x1g) * (y2g - y1g)

    union = box1_area + box2_area - inter_area
    return inter_area / union if union > 0 else 0



with open(GT_JSON_PATH, "r") as f:
    gt = json.load(f)

with open(YOLO_JSON_PATH, "r") as f:
    yolo_preds = json.load(f)

with open(SAHI_JSON_PATH, "r") as f:
    sahi_preds = json.load(f)



gt_by_image = defaultdict(list)
for ann in gt["annotations"]:
    gt_by_image[ann["image_id"]].append(xywh_to_xyxy(ann["bbox"]))

yolo_by_image = defaultdict(list)
for p in yolo_preds:
    yolo_by_image[p["image_id"]].append(xywh_to_xyxy(p["bbox"]))

sahi_by_image = defaultdict(list)
for p in sahi_preds:
    sahi_by_image[p["image_id"]].append(xywh_to_xyxy(p["bbox"]))



def evaluate(gt_boxes, pred_boxes):
    matched_gt = set()
    tp = 0

    for pb in pred_boxes:
        for i, gb in enumerate(gt_boxes):
            if i in matched_gt:
                continue
            if iou(pb, gb) >= IOU_THRESHOLD:
                tp += 1
                matched_gt.add(i)
                break

    fp = len(pred_boxes) - tp
    fn = len(gt_boxes) - tp

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0

    return tp, fp, fn, precision, recall



print("\n================ PER IMAGE ANALYSIS ================\n")

for img in gt["images"]:
    img_id = img["id"]
    name = img["file_name"]

    gt_boxes = gt_by_image.get(img_id, [])
    yolo_boxes = yolo_by_image.get(img_id, [])
    sahi_boxes = sahi_by_image.get(img_id, [])

    y_tp, y_fp, y_fn, y_p, y_r = evaluate(gt_boxes, yolo_boxes)
    s_tp, s_fp, s_fn, s_p, s_r = evaluate(gt_boxes, sahi_boxes)

    print(f"Image: {name}")
    print(f"  GT cars: {len(gt_boxes)}")

    print("  Normal YOLO:")
    print(f"    TP={y_tp}, FP={y_fp}, FN={y_fn}")
    print(f"    Precision={y_p:.3f}, Recall={y_r:.3f}")

    print("  YOLO + SAHI:")
    print(f"    TP={s_tp}, FP={s_fp}, FN={s_fn}")
    print(f"    Precision={s_p:.3f}, Recall={s_r:.3f}")

    print("-" * 55)

print("\nComparison complete")




Image: car3.jpg
  GT cars: 55
  Normal YOLO:
    TP=0, FP=13, FN=55
    Precision=0.000, Recall=0.000
  YOLO + SAHI:
    TP=0, FP=38, FN=55
    Precision=0.000, Recall=0.000
-------------------------------------------------------
Image: car2.jpg
  GT cars: 22
  Normal YOLO:
    TP=7, FP=4, FN=15
    Precision=0.636, Recall=0.318
  YOLO + SAHI:
    TP=19, FP=15, FN=3
    Precision=0.559, Recall=0.864
-------------------------------------------------------
Image: car1.jpg
  GT cars: 51
  Normal YOLO:
    TP=0, FP=3, FN=51
    Precision=0.000, Recall=0.000
  YOLO + SAHI:
    TP=0, FP=3, FN=51
    Precision=0.000, Recall=0.000
-------------------------------------------------------

✔ Comparison complete


In [None]:
from ultralytics import YOLO
from pathlib import Path
import json


model = YOLO("rtdetr-l.pt")

image_dir = Path("dataset/images")
output_dir = Path("outputs/normal_rtdetr")
output_dir.mkdir(parents=True, exist_ok=True)

image_paths = sorted(image_dir.glob("*.jpg"))


results = model(
    [str(p) for p in image_paths],
    conf=0.15,
    classes=[2],
    device="cpu"
)


for img_path, r in zip(image_paths, results):
    r.save(filename=str(output_dir / img_path.name))


coco_preds = []
image_id = 1

for r in results:
    if r.boxes is None:
        image_id += 1
        continue

    for box in r.boxes:
        x1, y1, x2, y2 = box.xyxy[0].tolist()
        coco_preds.append({
            "image_id": image_id,
            "category_id": 0,  
            "bbox": [x1, y1, x2 - x1, y2 - y1],
            "score": float(box.conf[0])
        })

    image_id += 1

with open("normal_rtdetr_preds.json", "w") as f:
    json.dump(coco_preds, f)

print("Normal RT-DETR (car-only) saved + JSON exported")


[KDownloading https://github.com/ultralytics/assets/releases/download/v8.4.0/rtdetr-l.pt to 'rtdetr-l.pt': 100% ━━━━━━━━━━━━ 63.4MB 7.7MB/s 8.2s 8.2s<0.1s7s

0: 640x640 38 cars, 2880.9ms
1: 640x640 46 cars, 2880.9ms
2: 640x640 (no detections), 2880.9ms
Speed: 5.9ms preprocess, 2880.9ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 640)
Normal RT-DETR (car-only) saved + JSON exported


In [None]:
from sahi import AutoDetectionModel
from sahi.predict import get_sliced_prediction
from pathlib import Path
import json

detection_model = AutoDetectionModel.from_pretrained(
    model_type="rtdetr",
    model_path="rtdetr-l.pt",
    confidence_threshold=0.15,
    device="cpu"
)

image_dir = Path("dataset/images")
output_dir = Path("outputs/sahi_rtdetr")
output_dir.mkdir(parents=True, exist_ok=True)


EXCLUDE_IDS = [i for i in range(80) if i != 2]

coco_preds = []
image_id = 1

for img_path in sorted(image_dir.glob("*.jpg")):
    result = get_sliced_prediction(
        image=str(img_path),
        detection_model=detection_model,
        slice_height=512,
        slice_width=512,
        overlap_height_ratio=0.25,
        overlap_width_ratio=0.25,
        exclude_classes_by_id=EXCLUDE_IDS
    )

    
    result.export_visuals(
        export_dir=str(output_dir),
        file_name=img_path.stem
    )

   
    for obj in result.object_prediction_list:
        coco_preds.append({
            "image_id": image_id,
            "category_id": 0, 
            "bbox": obj.bbox.to_xywh(),
            "score": obj.score.value
        })

    image_id += 1

with open("sahi_rtdetr_preds.json", "w") as f:
    json.dump(coco_preds, f)

print("RT-DETR + SAHI (car-only) saved + JSON exported")


Performing prediction on 66 slices.
Performing prediction on 80 slices.
Performing prediction on 80 slices.
RT-DETR + SAHI (car-only) saved + JSON exported


In [None]:
import json
from collections import defaultdict
from pathlib import Path


GT_JSON_PATH = Path(r"C:\Users\akash\akash_rawal_1\small_object\dataset\annotations\instances_gt_fixed.json")
PRED1_JSON_PATH = Path(r"C:\Users\akash\akash_rawal_1\small_object\normal_yolo_preds.json")
PRED2_JSON_PATH = Path(r"C:\Users\akash\akash_rawal_1\small_object\sahi_yolo_preds.json")


IMAGE_ID = 2
IOU_THRESHOLD = 0.5



def xywh_to_xyxy(box):
    x, y, w, h = box
    return [x, y, x + w, y + h]

def iou(box1, box2):
    x1, y1, x2, y2 = box1
    x1g, y1g, x2g, y2g = box2

    xi1, yi1 = max(x1, x1g), max(y1, y1g)
    xi2, yi2 = min(x2, x2g), min(y2, y2g)

    inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
    box1_area = (x2 - x1) * (y2 - y1)
    box2_area = (x2g - x1g) * (y2g - y1g)

    union = box1_area + box2_area - inter_area
    return inter_area / union if union > 0 else 0



with open(GT_JSON_PATH, "r") as f:
    gt = json.load(f)

with open(PRED1_JSON_PATH, "r") as f:
    preds1 = json.load(f)

with open(PRED2_JSON_PATH, "r") as f:
    preds2 = json.load(f)



gt_boxes = [
    xywh_to_xyxy(a["bbox"])
    for a in gt["annotations"]
    if a["image_id"] == IMAGE_ID
]

pred1_boxes = [
    xywh_to_xyxy(p["bbox"])
    for p in preds1
    if p["image_id"] == IMAGE_ID
]

pred2_boxes = [
    xywh_to_xyxy(p["bbox"])
    for p in preds2
    if p["image_id"] == IMAGE_ID
]



def evaluate(gt_boxes, pred_boxes):
    matched_gt = set()
    tp = 0

    for pb in pred_boxes:
        for i, gb in enumerate(gt_boxes):
            if i in matched_gt:
                continue
            if iou(pb, gb) >= IOU_THRESHOLD:
                tp += 1
                matched_gt.add(i)
                break

    fp = len(pred_boxes) - tp
    fn = len(gt_boxes) - tp

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0

    return tp, fp, fn, precision, recall



p1_tp, p1_fp, p1_fn, p1_p, p1_r = evaluate(gt_boxes, pred1_boxes)
p2_tp, p2_fp, p2_fn, p2_p, p2_r = evaluate(gt_boxes, pred2_boxes)

print("\n=========== IMAGE  ANALYSIS ===========\n")
print(f"GT cars: {len(gt_boxes)}\n")

print("Model 1 (Normal):")
print(f"  TP={p1_tp}, FP={p1_fp}, FN={p1_fn}")
print(f"   Recall={p1_r:.3f}\n")

print("Model 2 (With SAHI):")
print(f"  TP={p2_tp}, FP={p2_fp}, FN={p2_fn}")
print(f"   Recall={p2_r:.3f}")

print("\n✔ Image 2 comparison complete")




GT cars: 22

Model 1 (Normal):
  TP=7, FP=4, FN=15
   Recall=0.318

Model 2 (With SAHI):
  TP=19, FP=15, FN=3
   Recall=0.864

✔ Image 2 comparison complete


In [None]:
import json
from collections import defaultdict
from pathlib import Path


GT_JSON_PATH = Path(r"C:\Users\akash\akash_rawal_1\small_object\dataset\annotations\instances_gt_fixed.json")
PRED1_JSON_PATH = Path(r"C:\Users\akash\akash_rawal_1\small_object\normal_rtdetr_preds.json")
PRED2_JSON_PATH = Path(r"C:\Users\akash\akash_rawal_1\small_object\sahi_rtdetr_preds.json")


IMAGE_ID = 2
IOU_THRESHOLD = 0.5



def xywh_to_xyxy(box):
    x, y, w, h = box
    return [x, y, x + w, y + h]

def iou(box1, box2):
    x1, y1, x2, y2 = box1
    x1g, y1g, x2g, y2g = box2

    xi1, yi1 = max(x1, x1g), max(y1, y1g)
    xi2, yi2 = min(x2, x2g), min(y2, y2g)

    inter_area = max(0, xi2 - xi1) * max(0, yi2 - yi1)
    box1_area = (x2 - x1) * (y2 - y1)
    box2_area = (x2g - x1g) * (y2g - y1g)

    union = box1_area + box2_area - inter_area
    return inter_area / union if union > 0 else 0



with open(GT_JSON_PATH, "r") as f:
    gt = json.load(f)

with open(PRED1_JSON_PATH, "r") as f:
    preds1 = json.load(f)

with open(PRED2_JSON_PATH, "r") as f:
    preds2 = json.load(f)



gt_boxes = [
    xywh_to_xyxy(a["bbox"])
    for a in gt["annotations"]
    if a["image_id"] == IMAGE_ID
]

pred1_boxes = [
    xywh_to_xyxy(p["bbox"])
    for p in preds1
    if p["image_id"] == IMAGE_ID
]

pred2_boxes = [
    xywh_to_xyxy(p["bbox"])
    for p in preds2
    if p["image_id"] == IMAGE_ID
]



def evaluate(gt_boxes, pred_boxes):
    matched_gt = set()
    tp = 0

    for pb in pred_boxes:
        for i, gb in enumerate(gt_boxes):
            if i in matched_gt:
                continue
            if iou(pb, gb) >= IOU_THRESHOLD:
                tp += 1
                matched_gt.add(i)
                break

    fp = len(pred_boxes) - tp
    fn = len(gt_boxes) - tp

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0

    return tp, fp, fn, precision, recall



p1_tp, p1_fp, p1_fn, p1_p, p1_r = evaluate(gt_boxes, pred1_boxes)
p2_tp, p2_fp, p2_fn, p2_p, p2_r = evaluate(gt_boxes, pred2_boxes)

print("\n=========== IMAGE 2 ANALYSIS ===========\n")
print(f"GT cars: {len(gt_boxes)}\n")

print("Model 1 (Normal):")
print(f"  TP={p1_tp}, FP={p1_fp}, FN={p1_fn}")
print(f"   Recall={p1_r:.3f}\n")

print("Model 2 (With SAHI):")
print(f"  TP={p2_tp}, FP={p2_fp}, FN={p2_fn}")
print(f"   Recall={p2_r:.3f}")

print("\n✔ Image 2 comparison complete")




GT cars: 22

Model 1 (Normal):
  TP=13, FP=33, FN=9
   Recall=0.591

Model 2 (With SAHI):
  TP=16, FP=142, FN=6
   Recall=0.727

✔ Image 2 comparison complete
