In [1]:
!pip install -U torch sahi yolov8 ultralytics numpy opencv-python

Defaulting to user installation because normal site-packages is not writeable
Collecting torch
  Using cached torch-2.6.0-cp39-cp39-manylinux1_x86_64.whl.metadata (28 kB)
Collecting numpy
  Using cached numpy-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Collecting opencv-python
  Using cached opencv_python-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting typing-extensions>=4.10.0 (from torch)
  Using cached typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_6

In [1]:
# Import required functions and classes
from sahi import AutoDetectionModel
from sahi.utils.cv import read_image, read_image_as_pil
from sahi.utils.file import Path, increment_path, list_files, save_json, save_pickle, download_from_url
from sahi.predict import get_prediction, get_sliced_prediction, predict, agg_prediction, get_prediction_batched, get_sliced_prediction_batched 
from sahi.prediction import visualize_object_predictions
from IPython.display import Image
from numpy import asarray
import cv2
import os
import json
import numpy as np
from tqdm import tqdm
import time
from sahi.prediction import ObjectPrediction, PredictionResult

In [6]:
# Download YOLOv8-S model to 'models/yolov8s.pt'
yolov8_model_path = 'models/yolov8/last.pt'
#download_yolov8s_model(destination_path=yolov8_model_path)

In [7]:
detection_model = AutoDetectionModel.from_pretrained(
    model_type='yolov8',
    model_path=yolov8_model_path,
    confidence_threshold=0.3,
    device="cuda:0", # or 'cpu'
)

In [8]:
import cv2
import time
from PIL import Image

def get_slice_parameters(object_density, slice_size):
    
    #start_time = time.time()
    #image_path = "test_data/0000006_06773_d_0000018.jpg"
    #image = Image.open(image_path).convert("RGB")
    #image_width, image_height  = image.size
    #print("Image Width:", image_width)
    #print("Image Height:", image_height)
    #min_dim = min(image_width, image_height)
    #slice_size = min_dim // 4 if min_dim > 1600 else min_dim // 2
    #print(f"Dimension calculation time taken: {(time.time() - start_time)*1000:.2f} ms")

    
    if object_density >= 50:
        #slice_size = min_dim // 4
        slice_width = slice_size
        slice_height = slice_size
        overlap_width_ratio = 0.5
        overlap_height_ratio = 0.5
    elif 25 <= object_density < 50:
        #slice_size = min_dim // 2
        slice_width = slice_size
        slice_height = slice_size
        overlap_width_ratio = 0.25
        overlap_height_ratio = 0.25
    elif 10 <= object_density < 25:
        #slice_size = min_dim // 2
        slice_width = slice_size
        slice_height = slice_size
        overlap_width_ratio = 0.15
        overlap_height_ratio = 0.15
    else:
        return

    return slice_width, slice_height, overlap_width_ratio, overlap_height_ratio

In [9]:
# Function to get image details by image_id
def get_image_id(coco_data, image_name):
    for image in coco_data["images"]:
        file_name = Path(image['file_name']).stem
        if file_name == image_name:
            return image['id']
    return None

In [12]:
import os
import cv2
import json
import numpy as np
from tqdm import tqdm
import time
from sahi.prediction import ObjectPrediction, PredictionResult

# export visualization
def predict_sliced_images(input_folder, dataset_json_path, detection_model, slice_size):
    """
    Processes all image files in input_folder:
      - Runs predictions using get_prediction function and detection_model.
      - Saves annotated images with bounding boxes in output_folder.
      - Saves prediction details as JSON files in output_folder.
    
    Parameters:
      input_folder (str): Path to the folder containing images.
      output_folder (str): Path to the folder where results will be saved.
      detection_model: Your detection model used for prediction.
    """
    name = "exp"
    save_dir = Path(increment_path(Path("sliced_predictions") / name, exist_ok=False))
    os.makedirs(save_dir, exist_ok=True)

    if dataset_json_path:
        with open(dataset_json_path, "r") as file:
            data = json.load(file)
    
    #color = (0, 255, 0)  # original annotations in green
    visual_bbox_gt_thickness = 3
    visual_bbox_thickness = 2
    visual_text_size = 0.5
    visual_text_thickness = 1
    visual_hide_labels = False
    visual_hide_conf = False
    visual_export_format = 'png'
    sliced_predictions = []
    image_ids = []
    coco_json = []
    
    # Loop over files in the input folder
    for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(input_folder, filename)
            image_as_pil = read_image_as_pil(image_path)
            filename_without_ext = Path(filename).stem
            
            print("*****************************************")
            print("File Name", filename_without_ext)

            img_id = get_image_id(data, filename_without_ext)
            #image_ids.append(image_id)
            
            # Get predictions from your detection model

            
            time_start = time.time()
            prediction = get_prediction(image_path, detection_model) #changes
            time_end = time.time() - time_start
            #print(f"Intial Prediction Performed in {time_end} seconds")
            print("Intial Prediction time is: {:.2f} ms".format(time_end * 1000))
            
            object_density = len(prediction.object_prediction_list)
            print("Object Density:", object_density)
            
            if object_density > 10:
                slice_width, slice_height, overlap_width_ratio, overlap_height_ratio = get_slice_parameters(object_density, slice_size)
    
                print("********* Slice Parameters ***********")
                print("Slice Width: ", slice_width)
                print("Slice Height: ", slice_height)
                print("Overlap Width Ratio: ", overlap_width_ratio)
                print("Overlap Height Ratio: ", overlap_height_ratio)

                time_start_slice = time.time()
                result_sahi = get_sliced_prediction(
                    image_path,
                    detection_model,
                    slice_height = slice_height,
                    slice_width = slice_width,
                    overlap_height_ratio = overlap_height_ratio,
                    overlap_width_ratio = overlap_width_ratio,
                    postprocess_type = "NMS",
                    verbose = 2
                )
                time_end_slice = time.time() - time_start_slice
                #print(f"Prediction Performed in {time_end1} seconds")
                print("Sliced Prediction time is: {:.2f} ms".format(time_end_slice * 1000))
                
                coco_prediction = result_sahi.to_coco_predictions(image_id=img_id)

                for idx, predict in enumerate(coco_prediction):
                    if coco_prediction[idx]["bbox"]:
                            coco_json.append(predict)
                    
                sliced_predictions.append(result_sahi)
                
                visualize_object_predictions(
                    np.ascontiguousarray(image_as_pil),
                    object_prediction_list=result_sahi.object_prediction_list,
                    rect_th=visual_bbox_thickness,
                    text_size=visual_text_size,
                    text_th=visual_text_thickness,
                    hide_labels=visual_hide_labels,
                    hide_conf=visual_hide_conf,
                    output_dir=save_dir,
                    file_name=filename_without_ext,
                    export_format=visual_export_format,
                )
                
            else:
                print("Prediction time is: {:.2f} ms".format(time_end * 1000))
                
                coco_prediction = prediction.to_coco_predictions(image_id=img_id)

                for idx, predict in enumerate(coco_prediction):
                    if coco_prediction[idx]["bbox"]:
                            coco_json.append(predict)
                    
                sliced_predictions.append(prediction)
                
                visualize_object_predictions(
                    np.ascontiguousarray(image_as_pil),
                    object_prediction_list=prediction.object_prediction_list,
                    rect_th=visual_bbox_thickness,
                    text_size=visual_text_size,
                    text_th=visual_text_thickness,
                    hide_labels=visual_hide_labels,
                    hide_conf=visual_hide_conf,
                    output_dir=save_dir,
                    file_name=filename_without_ext,
                    export_format=visual_export_format,
                )
    total_time = time_end + time_end_slice
                 
    if dataset_json_path:
        save_path = str(save_dir / "result.json")
        save_json(coco_json, save_path)
        print(f"Prediction results are successfully exported to {save_dir}")
    print(f"Prediction Completed Sucessfully: {len(sliced_predictions)} images")
    print("Total Prediction time is: {:.2f} ms".format(total_time * 1000))
    return sliced_predictions

In [13]:
# Example usage: Adaptive-Optimized-NMS
source_folder = './single_test/images'
json_path = "./subset_vis_test_data_1162.json"
slice_size = 512
result_preds_adapt_opt_nms_iou_size = predict_sliced_images(source_folder, json_path, detection_model, slice_size)

*****************************************
File Name 0000011_05068_d_0000008
Intial Prediction time is: 23.96 ms
Object Density: 33
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.25
Overlap Height Ratio:  0.25
Sliced Boxes Count: 8
POST PROCESS:  NMS
Performing prediction on 8 slices.
Original Prediction Count 93
Final Bounding Box Count (NMS):  31
Final Bounding Box Count (NMS):  6
Final Bounding Box Count (NMS):  2
Final Bounding Box Count (NMS):  1
Final Bounding Box Count (NMS):  6
Final Bounding Box Count (NMS):  1
Final Bounding Box Count (NMS):  6
Slicing performed in 0.006385087966918945 seconds.
Prediction performed in 0.15878868103027344 seconds.
Sliced Prediction time is: 164.20 ms
Prediction results are successfully exported to sliced_predictions/exp227
Prediction Completed Sucessfully: 1 images
Total Prediction time is: 188.16 ms


In [14]:
result_predict_nms_latest = predict(source='./single_test',
                         dataset_json_path = './subset_vis_test_data_1162.json',
                         model_type = 'ultralytics',
                         model_path = 'models/yolov8/last.pt',
                         slice_height = 512,
                         slice_width = 512,
                         overlap_height_ratio = 0.5,
                         overlap_width_ratio = 0.5,
                         postprocess_type = "NMS",
                         verbose = 2
                        )

POST PROCESSING: NMS
indexing coco dataset annotations...


Loading coco annotations: 100%|██████████| 1/1 [00:00<00:00, 515.40it/s]
Performing inference on images:   0%|          | 0/1 [00:00<?, ?it/s]

Image Name: 0000011_05068_d_0000008
Image Size:  (1360, 765)
Sliced Boxes Count: 10
POST PROCESS:  NMS
Performing prediction on 10 slices.


Performing inference on images: 100%|██████████| 1/1 [00:00<00:00,  2.94it/s]

Original Prediction Count 148
Final Bounding Box Count (NMS):  34
Final Bounding Box Count (NMS):  9
Final Bounding Box Count (NMS):  3
Final Bounding Box Count (NMS):  3
Final Bounding Box Count (NMS):  7
Final Bounding Box Count (NMS):  2
Final Bounding Box Count (NMS):  10
Prediction time is: 233.24 ms
Prediction results are successfully exported to runs/predict/exp228
Model loaded in 0.02968883514404297 seconds.
Slicing performed in 0.0010364055633544922 seconds.
Prediction performed in 0.23323941230773926 seconds.
Exporting performed in 0.04594826698303223 seconds.





In [9]:
# Example usage: Adaptive-Optimized-NMS
source_folder = './single_test/images'
json_path = "./subset_visdrone_test_990.json"
slice_size = 512
result_preds_adapt_opt_nms_iou_size = predict_sliced_images(source_folder, json_path, detection_model, slice_size)

*****************************************
File Name 0000006_06773_d_0000018
Intial Prediction time is: 23.62 ms
Object Density: 37
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.25
Overlap Height Ratio:  0.25
POST PROCESS:  TruncatedNMS
Performing prediction on 8 slices.
Original Prediction Count 110
Adaptive Prediction Count 110
Total Valid prediction:  34
Slicing performed in 0.006056547164916992 seconds.
Prediction performed in 0.18665170669555664 seconds.
Sliced Prediction time is: 192.41 ms
Prediction results are successfully exported to sliced_predictions/exp142
Prediction Completed Sucessfully: 1 images
Total Prediction time is: 216.04 ms


In [11]:
#USING Adaptive-Optimized-NMS-IoU METHOD
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_990.json' --result_json_path './sliced_predictions/exp142/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.429
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.714
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.424
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.832
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.759
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.426
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.390
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.512
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [16]:
# Example usage: Adaptive-Optimized-NMS
source_folder = './single_test/images'
json_path = "./subset_visdrone_test_990.json"
slice_size = 512
result_preds_adapt_opt_nms_iou_size = predict_sliced_images(source_folder, json_path, detection_model, slice_size)

*****************************************
File Name 0000006_06773_d_0000018
Intial Prediction time is: 27.87 ms
Object Density: 37
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.25
Overlap Height Ratio:  0.25
POST PROCESS:  OptNMS
Performing prediction on 8 slices.
Original Prediction Count 110
Filtered Prediction:  41
Final Bounding Box Count: 20
Filtered Prediction:  8
Final Bounding Box Count: 4
Filtered Prediction:  54
Final Bounding Box Count: 11
Filtered Prediction:  5
Final Bounding Box Count: 3
Filtered Prediction:  2
Final Bounding Box Count: 2
Slicing performed in 0.0072019100189208984 seconds.
Prediction performed in 0.16542506217956543 seconds.
Sliced Prediction time is: 171.24 ms
Prediction results are successfully exported to sliced_predictions/exp140
Prediction Completed Sucessfully: 1 images
Total Prediction time is: 199.11 ms


In [17]:
#USING Adaptive-Optimized-NMS-IoU METHOD
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_990.json' --result_json_path './sliced_predictions/exp140/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.407
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.674
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.416
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.832
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.675
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.574
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.390
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.460
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [10]:
# Example usage: Adaptive-Optimized-NMS
source_folder = './single_test/images'
json_path = "./subset_visdrone_test_990.json"
slice_size = 512
result_preds_adapt_opt_nms_iou_size = predict_sliced_images(source_folder, json_path, detection_model, slice_size)

*****************************************
File Name 0000006_06773_d_0000018
Intial Prediction time is: 27.14 ms
Object Density: 37
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.25
Overlap Height Ratio:  0.25
POST PROCESS:  TruncatedNMS
Performing prediction on 8 slices.
Original Prediction Count 110
tensor([ True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True, False,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True, False,  True,  True,  True,  True,  True,  True,  True,  True, False,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True, False,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True, False,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True,  True, False,  True,  True,  True,  True,  True,
         True

In [12]:
#USING Adaptive-Optimized-NMS-IoU METHOD
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_990.json' --result_json_path './sliced_predictions/exp139/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.356
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.613
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.379
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.832
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.639
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.426
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.336
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.429
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [10]:
#USING Adaptive-Optimized-NMS-IoU METHOD
!sahi coco evaluate --dataset_json_path './subset_vis_test_data_1563.json' --result_json_path './sliced_predictions/exp134/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.02s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.211
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.360
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.213
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.242
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.697
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.417
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.106
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.371
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [10]:
#USING Adaptive-Optimized-NMS-IoU METHOD
!sahi coco evaluate --dataset_json_path './subset_vis_test_data_1563.json' --result_json_path './sliced_predictions/exp134/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.02s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.211
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.360
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.213
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.242
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.697
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.417
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.106
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.371
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [11]:
# Example usage: Adaptive-Optimized-NMS
source_folder = './single_test/images_bk'
json_path = "./subset_visdrone_test_990.json"
slice_size = 512
result_preds_adapt_opt_nms_iou_size = predict_sliced_images(source_folder, json_path, detection_model, slice_size)

*****************************************
File Name 0000006_06773_d_0000018
Intial Prediction time is: 26.86 ms
Object Density: 37
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.25
Overlap Height Ratio:  0.25
POST PROCESS:  TruncatedNMS
Performing prediction on 8 slices.
Original Prediction Count 110
Truncated Final Bounding Box Count: 28
Slicing performed in 0.007302045822143555 seconds.
Prediction performed in 0.1595766544342041 seconds.
Sliced Prediction time is: 165.10 ms
Prediction results are successfully exported to sliced_predictions/exp135
Prediction Completed Sucessfully: 1 images
Total Prediction time is: 191.96 ms


In [13]:
#USING Adaptive-Optimized-NMS-IoU METHOD
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_990.json' --result_json_path './sliced_predictions/exp135/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.396
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.685
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.397
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 1.000
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.675
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.426
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.407
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.460
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [16]:
result_predict_nms_1 = predict(source='./single_test',
                         dataset_json_path = './subset_visdrone_test_990.json',
                         model_type = 'ultralytics',
                         model_path = 'models/yolov8/last.pt',
                         slice_height = 512,
                         slice_width = 512,
                         overlap_height_ratio = 0.5,
                         overlap_width_ratio = 0.5,
                         postprocess_type = "NMS",
                         verbose = 2
                        )

POST PROCESSING: NMS
indexing coco dataset annotations...


Loading coco annotations: 100%|██████████| 1/1 [00:00<00:00, 409.12it/s]
Performing inference on images:   0%|          | 0/1 [00:00<?, ?it/s]

Image Name: 0000006_06773_d_0000018
Image Size:  (1360, 765)
POST PROCESS:  NMS
Performing prediction on 10 slices.


Performing inference on images: 100%|██████████| 1/1 [00:00<00:00,  1.94it/s]

Total Valid prediction:  1
Total Valid prediction:  21
Total Valid prediction:  8
Total Valid prediction:  13
Total Valid prediction:  3
Total Valid prediction:  2
Prediction time is: 402.36 ms
Prediction results are successfully exported to runs/predict/exp172
Model loaded in 0.031191587448120117 seconds.
Slicing performed in 0.0011489391326904297 seconds.
Prediction performed in 0.4023559093475342 seconds.
Exporting performed in 0.05055832862854004 seconds.





In [17]:
#USING Adaptive-Optimized-NMS-IoU METHOD
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_990.json' --result_json_path './runs/predict/exp172/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.399
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.699
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.376
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 1.000
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.672
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.574
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.407
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.446
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [13]:
result_predict_nms_2 = predict(source='./single_test',
                         dataset_json_path = './subset_visdrone_test_990.json',
                         model_type = 'ultralytics',
                         model_path = 'models/yolov8/last.pt',
                         slice_height = 512,
                         slice_width = 512,
                         overlap_height_ratio = 0.5,
                         overlap_width_ratio = 0.5,
                         postprocess_type = "NMS",
                         verbose = 2
                        )

POST PROCESSING: NMS
indexing coco dataset annotations...


Loading coco annotations: 100%|██████████| 1/1 [00:00<00:00, 362.11it/s]
Performing inference on images:   0%|          | 0/1 [00:00<?, ?it/s]

Image Name: 0000006_06773_d_0000018
Image Size:  (1360, 765)
POST PROCESS:  NMS
Performing prediction on 10 slices.


Performing inference on images: 100%|██████████| 1/1 [00:00<00:00,  2.84it/s]

Total Valid prediction:  1
Total Valid prediction:  21
Total Valid prediction:  8
Total Valid prediction:  13
Total Valid prediction:  3
Total Valid prediction:  2
Prediction time is: 239.31 ms
Prediction results are successfully exported to runs/predict/exp173
Model loaded in 0.07065629959106445 seconds.
Slicing performed in 0.0011546611785888672 seconds.
Prediction performed in 0.23931050300598145 seconds.
Exporting performed in 0.04933500289916992 seconds.





In [12]:
result_predict_nms_3 = predict(source='./single_test',
                         dataset_json_path = './subset_visdrone_test_990.json',
                         model_type = 'ultralytics',
                         model_path = 'models/yolov8/last.pt',
                         slice_height = 512,
                         slice_width = 512,
                         overlap_height_ratio = 0.5,
                         overlap_width_ratio = 0.5,
                         postprocess_type = "NMS",
                         verbose = 2
                        )

POST PROCESSING: NMS
indexing coco dataset annotations...


Loading coco annotations: 100%|██████████| 1/1 [00:00<00:00, 377.19it/s]
Performing inference on images:   0%|          | 0/1 [00:00<?, ?it/s]

Image Name: 0000006_06773_d_0000018
Image Size:  (1360, 765)
POST PROCESS:  NMS
Performing prediction on 10 slices.


Performing inference on images: 100%|██████████| 1/1 [00:00<00:00,  2.88it/s]

Total Valid prediction:  1
Total Valid prediction:  21
Total Valid prediction:  8
Total Valid prediction:  13
Total Valid prediction:  3
Total Valid prediction:  2
Prediction time is: 233.85 ms
Prediction results are successfully exported to runs/predict/exp174
Model loaded in 0.0613703727722168 seconds.
Slicing performed in 0.0011582374572753906 seconds.
Prediction performed in 0.23385357856750488 seconds.
Exporting performed in 0.04917120933532715 seconds.





In [14]:
#USING Adaptive-Optimized-NMS-IoU METHOD
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_990.json' --result_json_path './runs/predict/exp174/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.399
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.699
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.376
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 1.000
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.672
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.574
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.407
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.446
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [14]:
#USING Adaptive-Optimized-NMS-IoU METHOD
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_990.json' --result_json_path './runs/predict/exp173/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.399
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.699
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.376
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 1.000
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.672
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.574
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.407
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.446
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [18]:
def area(box):
    """
    Compute the area of a box.
    Box format: [x1, y1, x2, y2]
    """
    width = max(0, box[2] - box[0])
    height = max(0, box[3] - box[1])
    return width * height

def intersection_area(boxA, boxB):
    """
    Compute the area of intersection between two boxes.
    """
    x_left   = max(boxA[0], boxB[0])
    y_top    = max(boxA[1], boxB[1])
    x_right  = min(boxA[2], boxB[2])
    y_bottom = min(boxA[3], boxB[3])
    
    if x_right < x_left or y_bottom < y_top:
        return 0
    return (x_right - x_left) * (y_bottom - y_top)

def iou(boxA, boxB):
    """
    Compute the Intersection-over-Union (IOU) between two boxes.
    """
    inter = intersection_area(boxA, boxB)
    union = area(boxA) + area(boxB) - inter
    if union == 0:
        return 0
    return inter / union

def truncated_nms(boxes, scores, IOUt, IOIt, IOOt):
    """
    Performs Truncated Non-Maximum Suppression.
    
    Inputs:
      boxes: list of detection boxes (each box is [x1, y1, x2, y2])
      scores: list of detection scores corresponding to each box
      IOUt: IOU threshold for union
      IOIt: inside box threshold (for ratio of intersection area to box M)
      IOOt: outside box threshold (for ratio of intersection area to box b_i)
    
    Returns:
      K: list of boxes selected after suppression.
    
    Note:
      This implementation follows one interpretation of the provided pseudocode.
    """
    K = []
    # Work with a list of (box, score) pairs for easier management.
    boxes_scores = list(zip(boxes, scores))
    
    while boxes_scores:
        # Select box M with the highest score.
        idx = max(range(len(boxes_scores)), key=lambda i: boxes_scores[i][1])
        M, score_M = boxes_scores.pop(idx)
        
        new_boxes_scores = []
        added_M = False  # flag to ensure M is added only once.
        
        # Process each remaining box b_i.
        for box, score in boxes_scores:
            inter = intersection_area(M, box)
            area_M = area(M)
            area_box = area(box)
            
            # To avoid division by zero, if area is zero, set conditions to False.
            if area_M == 0 or area_box == 0:
                cond1 = False
                cond2 = False
            else:
                # Compute the ratios.
                ratio_M = inter / area_M      # Intersection relative to M.
                ratio_box = inter / area_box  # Intersection relative to b_i.
                
                cond1 = (ratio_M > IOIt) and (ratio_box <= IOOt)
                cond2 = (ratio_M <= IOOt) and (ratio_box > IOIt)
            
            cond = cond1 or cond2
            current_iou = iou(M, box)
            
            if not cond1:
                # In this branch the pseudocode suggests to add M to K.
                if not added_M:
                    K.append(M)
                    added_M = True
                # If IOU is low and the combined condition is false, keep b_i.
                if current_iou <= IOUt and (not cond):
                    new_boxes_scores.append((box, score))
                # Otherwise, b_i is removed (i.e. not appended).
            else:
                # If cond1 is true, then remove b_i only if IOU is high.
                if current_iou < IOUt:
                    new_boxes_scores.append((box, score))
                # If IOU >= IOUt, b_i is dropped.
        
        boxes_scores = new_boxes_scores  # Update the list with surviving boxes.
    
    return K

# Example usage:
if __name__ == "__main__":
    # Example boxes and scores (format: [x1, y1, x2, y2])
    boxes = [
        [10, 10, 50, 50],
        [12, 12, 48, 48],
        [60, 60, 100, 100]
    ]
    scores = [0.9, 0.85, 0.8]
    
    # Thresholds (example values)
    IOUt = 0.5   # IOU threshold for union
    IOIt = 0.6   # inside box threshold
    IOOt = 0.4   # outside box threshold
    
    selected_boxes = truncated_nms(boxes, scores, IOUt, IOIt, IOOt)
    print("Selected boxes:", selected_boxes)


Selected boxes: [[10, 10, 50, 50]]


In [23]:
import torch

def area(box):
    """
    Compute the area of a box.
    Box format: [x1, y1, x2, y2]
    """
    width = max(0, box[2] - box[0])
    height = max(0, box[3] - box[1])
    return width * height

def intersection_area(boxA, boxB):
    """
    Compute the intersection area between two boxes.
    """
    x_left   = max(boxA[0], boxB[0])
    y_top    = max(boxA[1], boxB[1])
    x_right  = min(boxA[2], boxB[2])
    y_bottom = min(boxA[3], boxB[3])
    if x_right < x_left or y_bottom < y_top:
        return 0
    return (x_right - x_left) * (y_bottom - y_top)

def iou(boxA, boxB):
    """
    Compute the Intersection-over-Union (IOU) between two boxes.
    """
    inter = intersection_area(boxA, boxB)
    union = area(boxA) + area(boxB) - inter
    if union == 0:
        return 0
    return inter / union

def optimized_truncated_nms(predictions: torch.Tensor, 
                            adaptive_iou: torch.Tensor, 
                            match_metric: str = "IOU", 
                            conf_threshold: float = 0.3, 
                            min_area: float = 64,
                            IOIt: float = 0.6, 
                            IOOt: float = 0.4):
    """
    Integrated Optimized and Truncated NMS.
    
    This function first assumes that predictions have been filtered 
    (e.g., by confidence and area) and then applies a truncated suppression 
    logic using both an adaptive IoU threshold (per candidate) and additional 
    criteria (IOIt, IOOt) for inside/outside box ratios.
    
    Args:
        predictions: Tensor of shape [num_boxes, 6] (x1, y1, x2, y2, score, category_id).
        adaptive_iou: Tensor of adaptive IoU thresholds for each prediction.
        match_metric: "IOU" or "IOS". (Currently only "IOU" is used in the truncated logic.)
        conf_threshold: Confidence threshold (assumes predictions are already filtered).
        min_area: Minimum area required for a valid prediction.
        IOIt: Threshold for the intersection ratio relative to the selected box (M).
        IOOt: Threshold for the intersection ratio relative to a candidate box.
        
    Returns:
        A list of indices (referring to the input predictions) for the kept predictions.
    """
    print("Filtered Prediction Count:", len(predictions))
    if predictions.numel() == 0:
        return []

    # (Optional) You could filter predictions here based on conf_threshold and min_area.
    # For this integrated implementation, we assume predictions are already filtered.

    # Convert boxes and scores from predictions to Python lists.
    # We assume predictions[:, :4] holds [x1, y1, x2, y2] and predictions[:, 4] holds scores.
    boxes = predictions[:, :4].tolist()
    scores = predictions[:, 4].tolist()
    # Track original indices so we can return them.
    indices = list(range(len(boxes)))
    # Build a list of tuples: (box, score, index)
    boxes_scores = list(zip(boxes, scores, indices))

    kept_indices = []

    while boxes_scores:
        # Select the box M with the highest score.
        idx = max(range(len(boxes_scores)), key=lambda i: boxes_scores[i][1])
        M, score_M, index_M = boxes_scores.pop(idx)
        new_boxes_scores = []
        added_M = False  # Ensure M is added only once to the kept list.

        for (box, score, index) in boxes_scores:
            inter = intersection_area(M, box)
            a_M = area(M)
            a_box = area(box)
            if a_M == 0 or a_box == 0:
                cond1 = False
                cond2 = False
            else:
                ratio_M = inter / a_M      # Intersection ratio relative to M.
                ratio_box = inter / a_box  # Intersection ratio relative to the candidate.
                cond1 = (ratio_M > IOIt) and (ratio_box <= IOOt)
                cond2 = (ratio_M <= IOOt) and (ratio_box > IOIt)
            cond = cond1 or cond2
            current_iou = iou(M, box)
            # Use the candidate's adaptive IoU threshold.
            # (If adaptive_iou is not available for an index, a default (e.g. 0.5) could be used.)
            #adaptive_thresh = adaptive_iou[index].item() if adaptive_iou.numel() > index else 0.5

            if not cond1:
                if not added_M:
                    kept_indices.append(index_M)
                    added_M = True
                # If the overlap is within the adaptive threshold and the extra condition is false, keep candidate.
                if current_iou <= adaptive_thresh and (not cond):
                    new_boxes_scores.append((box, score, index))
            else:
                # If cond1 is true, keep the candidate only if IOU is below the adaptive threshold.
                if current_iou < adaptive_thresh:
                    new_boxes_scores.append((box, score, index))
        boxes_scores = new_boxes_scores

    print("Final Bounding Box Count:", len(kept_indices))
    return kept_indices

# Example usage:
if __name__ == "__main__":
    # Create an example predictions tensor.
    # Each row: [x1, y1, x2, y2, score, category_id]
    predictions = torch.tensor([
        [10, 10, 50, 50, 0.9, 1],
        [12, 12, 48, 48, 0.85, 1],
        [60, 60, 100, 100, 0.8, 2]
    ])
    # For demonstration, set an adaptive IoU threshold per prediction.
    adaptive_iou = torch.tensor([0.5, 0.5, 0.5], dtype=torch.float32)
    
    kept = optimized_truncated_nms(predictions, adaptive_iou,
                                   match_metric="IOU", 
                                   conf_threshold=0.3, 
                                   min_area=64, 
                                   IOIt=0.6, 
                                   IOOt=0.4)
    print("Kept prediction indices:", kept)

Filtered Prediction Count: 3
Final Bounding Box Count: 1
Kept prediction indices: [0]


In [19]:
import torch
def truncated_nms(predictions: torch.tensor,
                  IoU_threshold:float,
                  IoI_threshold:float,
                  IoO_threshold:float,
                  match_metric: str = "IOU", 
                  conf_threshold: float = 0.3, 
                  min_area: float = 64):
    """
    Optimized NMS that first filters out low-confidence and small-area predictions.
    Args:
        predictions: Tensor of shape [num_boxes, 6] (x1, y1, x2, y2, score, category_id).
        match_metric: "IOU" or "IOS".
        match_threshold: IoU/IOS threshold for suppression.
        conf_threshold: Confidence threshold to filter predictions.
        min_area: Minimum area required for a valid prediction.
    Returns:
        A list of indices for the kept predictions.
    """
    # Filter out low-quality predictions first
    #print("Prediction Count Original: ", len(predictions))
    #predictions = filter_predictions(predictions, conf_threshold, 64)
   
    #predictions = filter_border_predictions(predictions, image_size, 5)
    
    if predictions.numel() == 0:
        return []
    
    x1 = predictions[:, 0]
    y1 = predictions[:, 1]
    x2 = predictions[:, 2]
    y2 = predictions[:, 3]
    scores = predictions[:, 4]
    areas = (x2 - x1) * (y2 - y1)
    
    # Sort the predictions by their confidence scores (ascending order)
    order = scores.argsort()
    keep = []

    while order.numel() > 0:
        indices_to_remove = []
        idx = order[-1]  # index of highest score prediction
        bm = predictions[idx, :4] #prediction with highest score
        bm_area = areas[idx]
        current_idx = idx.item()
        added_bm = False 

        # Remove the chosen box from the order
        order = order[:-1]
        if order.numel() == 0:
            keep.append(current_idx)
            break

        # Gather remaining boxes
        xx1 = torch.index_select(x1, 0, order)
        yy1 = torch.index_select(y1, 0, order)
        xx2 = torch.index_select(x2, 0, order)
        yy2 = torch.index_select(y2, 0, order)

        # Compute intersection coordinates
        xx1 = torch.max(xx1, x1[idx])
        yy1 = torch.max(yy1, y1[idx])
        xx2 = torch.min(xx2, x2[idx])
        yy2 = torch.min(yy2, y2[idx])

        # Compute width and height of intersection
        w = torch.clamp(xx2 - xx1, min=0.0)
        h = torch.clamp(yy2 - yy1, min=0.0)
        inter = w * h

        rem_areas = torch.index_select(areas, 0, order)

        # Calculate the two ratios for truncated NMS logic:
        ratio1 = inter / (bm_area + 1e-9) # Intersection relative to bm's area
        ratio2 = inter / (rem_areas + 1e-9) # Intersection relative to each remaining box's area
  
        #iou_thresholds = torch.index_select(adaptive_iou, 0, order)
        
        #print("adaptive_iou", iou_thresholds)
        # Keep boxes with IoU/IOS less than their respective adaptive threshold
        #mask = match_metric_value < iou_thresholds
        
        #Truncated NMS Conditions
        #This checks if a significant portion of 𝑀 overlaps 𝑏𝑖
        cond1 = (ratio1 > IoI_threshold) & (ratio2 <= IoO_threshold)

        #This is complementary to Condition 1. Alternatively, if the fraction of bm covered is less than or equal to IoO_threshold while the remaining box's overlap is above IoI_threshold.

        cond2 = (ratio1 <= IoO_threshold) & (ratio2 > IoI_threshold)

        cond = (cond1 | cond2)

        if match_metric == "IOU":
            union = (rem_areas - inter) + areas[idx]
            match_metric_value = inter / (union + 1e-9)
        elif match_metric == "IOS":
            smaller = torch.min(rem_areas, areas[idx])
            match_metric_value = inter / (smaller + 1e-9)
        else:
            raise ValueError("Unsupported match_metric. Use 'IOU' or 'IOS'.")

        #We add only outside boxes and remove inside boxes
        
        #if torch.any(~(cond)):  #bm is an outside box          #if not torch.all(cond):
        #    keep.append(current_idx)
        
        if not cond1.any():
            if not added_bm:
                keep.append(current_idx)
                added_bm = True

        # Keep boxes with IoU/IOS less than the threshold
        mask1 = match_metric_value <= IoU_threshold
        mask2 = match_metric_value >= IoU_threshold
            # If the overlap is within the adaptive threshold and the extra condition is false, keep candidate.
            #if mask1 & (not torch.all(cond)):
             #   print("step2")
              #  order = order[mask1]
        #else:
          #  if mask2:
           #      print("step3") 
            #     order = order[mask2]
        
        keep_mask = (match_metric_value <= IoU_threshold) & (~cond)
        #print(keep_mask)
        order = order[keep_mask]
        
    print("Truncated Final Bounding Box Count:", len(keep))
    return keep

In [20]:
# Example usage:
if __name__ == "__main__":
    # Create an example predictions tensor.
    # Each row: [x1, y1, x2, y2, score, category_id]
    predictions = torch.tensor([
        [10, 10, 50, 50, 0.9, 1],
        [12, 12, 48, 48, 0.85, 1],
        [60, 60, 100, 100, 0.8, 2]
    ])
    # For demonstration, set an adaptive IoU threshold per prediction.
    adaptive_iou = torch.tensor([0.5, 0.5, 0.5], dtype=torch.float32)
    
    kept = truncated_nms(predictions, IoU_threshold = 0.5,
                                   match_metric="IOU", 
                                   conf_threshold=0.3, 
                                   min_area=64, 
                                   IoI_threshold=0.6, 
                                   IoO_threshold=0.4)
    print("Kept prediction indices:", kept)


Truncated Final Bounding Box Count: 2
Kept prediction indices: [0, 2]


In [4]:
import numpy as np

def iou(box1, box2):
    """
    Calculate the Intersection Over Union (IOU) between two bounding boxes.
    
    box1 and box2 are tuples (x1, y1, x2, y2) representing the coordinates of the top-left and bottom-right corners.
    """
    x1_intersection = max(box1[0], box2[0])
    y1_intersection = max(box1[1], box2[1])
    x2_intersection = min(box1[2], box2[2])
    y2_intersection = min(box1[3], box2[3])

    # Calculate area of intersection
    intersection_width = max(0, x2_intersection - x1_intersection)
    intersection_height = max(0, y2_intersection - y1_intersection)
    intersection_area = intersection_width * intersection_height

    # Calculate area of both boxes
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])

    # IOU = intersection area / union area
    union_area = area1 + area2 - intersection_area
    return intersection_area / union_area if union_area > 0 else 0

def truncated_nms(B, S, IOUt, IOIt, IOOt):
    """
    Perform truncated Non-Maximum Suppression (NMS) on the list of bounding boxes and their scores.
    
    Parameters:
        B: List of bounding boxes.
        S: List of corresponding detection scores.
        IOUt: IOU threshold to keep boxes.
        IOIt: Inside box threshold for truncated NMS.
        IOOt: Outside box threshold for truncated NMS.
        
    Returns:
        K: List of bounding boxes after truncated NMS.
    """
    K = []  # Initialize the list of selected boxes

    while B:
        # Select the box with the highest score
        m = np.argmax(S)
        M = B[m]
        
        # Remove the selected box from the list
        B.pop(m)
        S.pop(m)
        
        # Iterate over the remaining boxes
        for i, bi in enumerate(B[:]):
            # Check if the intersection condition is met
            intersection_area = iou(M, bi)
            condition1 = intersection_area > IOIt and intersection_area < iou(M, bi) <= IOOt
            condition2 = intersection_area <= IOOt and intersection_area > IOIt
            
            condition = condition1 or condition2

            if not condition:
                K.append(M)  # Keep the box in K
                if iou(M, bi) <= IOUt and not condition:
                    # Remove box if IOU condition fails
                    B.pop(i)
                    S.pop(i)
                else:
                    # Otherwise, remove the box and score from B and S
                    B.pop(i)
                    S.pop(i)
            else:
                # Keep the box for further iteration
                if iou(M, bi) >= IOUt:
                    B.pop(i)
                    S.pop(i)
                
    return K

# Example usage
B = [(0, 0, 50, 50), (10, 10, 60, 60), (30, 30, 80, 80)]  # Example bounding boxes
S = [0.9, 0.85, 0.8]  # Corresponding detection scores
IOUt = 0.5  # IOU threshold
IOIt = 0.3  # Inside box threshold
IOOt = 0.7  # Outside box threshold

K = truncated_nms(B, S, IOUt, IOIt, IOOt)
print("Selected Boxes:", K)

Selected Boxes: [(0, 0, 50, 50)]


In [46]:
import numpy as np

def intersection(box1, box2):
    """Computes the intersection area between two bounding boxes."""
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    inter_width = max(0, x2 - x1)
    inter_height = max(0, y2 - y1)
    
    return inter_width * inter_height

def iou(box1, box2):
    """Computes the Intersection over Union (IoU) between two bounding boxes."""
    inter = intersection(box1, box2)
    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    union = area1 + area2 - inter
    return inter / union if union > 0 else 0

def truncated_nms(predictions:torch.tensor, IOUt, IOIt, IOOt):
    """
    Performs Truncated Non-Maximum Suppression.
    
    Args:
        B (list of list): List of bounding boxes [x1, y1, x2, y2].
        S (list of float): Corresponding detection scores.
        IOUt (float): IoU threshold.
        IOIt (float): Intersection over Inside Box threshold.
        IOOt (float): Intersection over Outside Box threshold.
    
    Returns:
        list: Filtered list of bounding boxes after Truncated NMS.
    """
    K = []  # Output list of selected bounding boxes
    num_boxes = predictions.shape[0]
    print(num_boxes)
    
    for idx in range(len(predictions)):
        B = predictions[idx, :4]
        S = predictions[idx, 4]

        while len(B) > 0:
            m = np.argmax(S)  # Get index of highest score
            M = B[m]  # Select the corresponding box
            K.append(M)  # Add it to the final selection
            
            B.pop(m)  # Remove selected box from B
            S.pop(m)  # Remove corresponding score
    
            new_B = []
            new_S = []
            
            for i in range(len(B)):
                bi = B[i]
                inter = intersection(M, bi)
                
                # Compute conditions
                condition1 = (inter / ((M[2] - M[0]) * (M[3] - M[1])) > IOIt) and (inter / ((bi[2] - bi[0]) * (bi[3] - bi[1])) <= IOOt)
                condition2 = (inter / ((M[2] - M[0]) * (M[3] - M[1])) <= IOOt) and (inter / ((bi[2] - bi[0]) * (bi[3] - bi[1])) > IOIt)
                condition = condition1 or condition2
    
                if iou(M, bi) <= IOUt and not condition:
                    new_B.append(bi)
                    new_S.append(S[i])
                elif iou(M, bi) >= IOUt:
                    continue  # Discard the box
    
            B = new_B
            S = new_S

    return K

In [None]:
predictions = torch.tensor([
    [10, 10, 50, 50, 0.9, 1],
    [12, 12, 48, 48, 0.85, 1],
    [60, 60, 100, 100, 0.8, 2]
])
IOUt = 0.5
IOIt = 0.3
IOOt = 0.7

filtered_boxes = truncated_nms(predictions, IOUt, IOIt, IOOt)
print(filtered_boxes)

In [52]:
import torch

def truncated_nms_2(
    predictions: torch.Tensor,
    match_metric: str = "IOU",
    IOUt: float = 0.5,  # IoU threshold for suppression
    IOIt: float = 0.5,  # Intersection over Inside threshold
    IOOt: float = 0.5,  # Intersection over Outside threshold
):
    """
    Perform Truncated Non-Maximum Suppression (NMS).
    
    Args:
        predictions: (tensor) The location preds for the image
            along with the class predscores, Shape: [num_boxes, 5].
        match_metric: (str) 'IOU' or 'IOS'
        IOUt: (float) IoU threshold for the final decision.
        IOIt: (float) Intersection over Inside Box threshold.
        IOOt: (float) Intersection over Outside Box threshold.

    Returns:
        A list of filtered indexes, Shape: [ ,]
    """

    # Extract coordinates for each prediction box
    x1 = predictions[:, 0]
    y1 = predictions[:, 1]
    x2 = predictions[:, 2]
    y2 = predictions[:, 3]

    # Extract the confidence scores as well
    scores = predictions[:, 4]

    # Calculate the area of each bounding box
    areas = (x2 - x1) * (y2 - y1)
    
    # Sort the prediction boxes based on their confidence scores
    order = scores.argsort()

    # Initialize an empty list for filtered prediction boxes
    keep = []

    while len(order) > 0:
        # Extract the index of the prediction with the highest score
        idx = order[-1]

        # Add this prediction to the filtered list
        keep.append(idx.item())  # Use .item() to append the value directly

        # Remove the selected box from further consideration
        order = order[:-1]

        # Sanity check: if there are no boxes left to process, break out
        if len(order) == 0:
            break

        # Select coordinates of the remaining boxes based on 'order'
        xx1 = x1[order]
        yy1 = y1[order]
        xx2 = x2[order]
        yy2 = y2[order]

        # Find the coordinates of the intersection boxes
        xx1 = torch.max(xx1, x1[idx])
        yy1 = torch.max(yy1, y1[idx])
        xx2 = torch.min(xx2, x2[idx])
        yy2 = torch.min(yy2, y2[idx])

        # Find the width and height of the intersection boxes
        w = xx2 - xx1
        h = yy2 - yy1

        # Clamp to avoid negative width/height (non-overlapping boxes)
        w = torch.clamp(w, min=0.0)
        h = torch.clamp(h, min=0.0)

        # Calculate the intersection area
        inter = w * h

        # Calculate the areas of the remaining boxes
        rem_areas = areas[order]

        # Calculate the Intersection over Union (IoU) or Intersection over Smaller Box (IoS)
        if match_metric == "IOU":
            # Calculate the union of every prediction and the selected prediction
            union = (rem_areas - inter) + areas[idx]
            # Compute the IoU of each prediction with the selected one
            match_metric_value = inter / union

        elif match_metric == "IOS":
            # Calculate the smaller area between the current and selected boxes
            smaller = torch.min(rem_areas, areas[idx])
            # Compute the IoS of each prediction with the selected one
            match_metric_value = inter / smaller
        else:
            raise ValueError(f"Unknown match metric: {match_metric}")

        # Calculate the intersection areas relative to the box S (idx)
        inter_area_M = inter / ((x2[idx] - x1[idx]) * (y2[idx] - y1[idx]))  # Relative to box M
        inter_area_bi = inter / ((xx2 - xx1) * (yy2 - yy1))  # Relative to box bi
        
        # Apply conditions based on the truncated NMS logic
        condition1 = (inter_area_M > IOIt) and (inter_area_bi <= IOOt)
        condition2 = (inter_area_M <= IOOt) and (inter_area_bi > IOIt)
        condition = condition1 or condition2

        if not condition1.any():
            keep.append(idx.item())
        
        # Keep boxes that have IoU/Ios lower than the threshold and satisfy the conditions
        mask = (match_metric_value < IOUt) & ~condition
        order = order[mask]

    print("Total valid predictions: ", len(keep))
    return keep

In [56]:
import numpy as np

# Function to calculate IoU (Intersection over Union)
def iou(box1, box2):
    """
    Compute Intersection over Union (IoU) between two bounding boxes.
    Assumes boxes are in the format [x1, y1, x2, y2].
    """
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    intersection_area = max(0, x2 - x1) * max(0, y2 - y1)
    
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])

    union_area = box1_area + box2_area - intersection_area

    return intersection_area / union_area if union_area > 0 else 0

# Truncated NMS Implementation
def truncated_nms_lt(predictions:torch.tensor, IOUt, IOIt, IOOt):
    """
    Perform Truncated Non-Maximum Suppression.
    
    Parameters:
    B - List of bounding boxes (each box as [x1, y1, x2, y2])
    S - List of corresponding confidence scores
    IOUt - IoU threshold
    IOIt - Inside IoU threshold
    IOOt - Outside IoU threshold
    
    Returns:
    K - Filtered list of bounding boxes
    """
    B = predictions[,:4]
    S = predictions[, 4]
    K = []  # Output list of selected boxes
    
    while len(B) > 0:
        # Find the box with the highest score
        m = np.argmax(S)
        M = B[m]

        # Remove the selected box from B and S
        B.pop(m)
        S.pop(m)

        # Check remaining boxes
        new_B = []
        new_S = []
        
        for i in range(len(B)):
            bi = B[i]
            iou_M_bi = iou(M, bi)

            # Condition 1: M > IOIt and IoU(M, bi) <= IOOt
            condition1 = (iou_M_bi > IOIt) and (iou_M_bi <= IOOt)

            # Condition 2: M <= IOOt and IoU(M, bi) > IOIt
            condition2 = (iou_M_bi <= IOOt) and (iou_M_bi > IOIt)

            if not condition1:
                K.append(M)

            # If IoU is within threshold, remove bi; otherwise, keep it
            if not ((iou_M_bi <= IOUt) and not (condition1 or condition2)):
                new_B.append(bi)
                new_S.append(S[i])

        B = new_B
        S = new_S

    return K

# Example usage
predictions = torch.tensor([
    [10, 10, 50, 50, 0.9, 1],
    [12, 12, 48, 48, 0.85, 1],
    [60, 60, 100, 100, 0.8, 2]
])

IOUt = 0.7
IOIt = 0.5
IOOt = 0.3

result = truncated_nms_lt(predictions, IOUt, IOIt, IOOt)
print("Filtered Boxes:", result)

SyntaxError: invalid syntax (704029454.py, line 38)

In [1]:
import torch

def truncated_nms_merge(
    predictions: torch.Tensor,
    match_metric: str = "IOU",
    IOUt: float = 0.7,  # Truncation IoU threshold for keeping boxes
    IOIt: float = 0.5,  # Inside IoU threshold
    IOOt: float = 0.3   # Outside IoU threshold
):
    """
    Apply truncated non-maximum suppression to avoid detecting too many
    overlapping bounding boxes for a given object, with added truncation logic.

    Args:
        predictions (tensor): The location preds for the image along with the class scores, Shape: [num_boxes, 5].
        match_metric (str): IOU or IOS (Intersection over Area or Intersection over Union)
        match_threshold (float): The overlap threshold for match metric.
        IOUt (float): Intersection over Union threshold for truncation (threshold to keep boxes)
        IOIt (float): Inside Intersection over Union threshold (threshold to keep inside box)
        IOOt (float): Outside Intersection over Union threshold (threshold for outside box)
    
    Returns:
        List: A list of filtered indexes
    """
    # Extract coordinates for every prediction box present in P
    x1 = predictions[:, 0]
    y1 = predictions[:, 1]
    x2 = predictions[:, 2]
    y2 = predictions[:, 3]

    # Extract the confidence scores
    scores = predictions[:, 4]

    # Calculate area of every box
    areas = (x2 - x1) * (y2 - y1)

    # Sort the prediction boxes in P according to their confidence scores
    order = scores.argsort()

    # Initialize an empty list for filtered prediction boxes
    keep = []

    while len(order) > 0:
        # Extract the index of the prediction with the highest score (S)
        idx = order[-1]

        # Push S in filtered predictions list
        keep.append(idx.tolist())

        # Remove S from P
        order = order[:-1]

        # Sanity check
        if len(order) == 0:
            break

        # Select coordinates of remaining boxes according to the indices in order
        xx1 = torch.index_select(x1, dim=0, index=order)
        xx2 = torch.index_select(x2, dim=0, index=order)
        yy1 = torch.index_select(y1, dim=0, index=order)
        yy2 = torch.index_select(y2, dim=0, index=order)

        # Find the coordinates of the intersection boxes
        xx1 = torch.max(xx1, x1[idx])
        yy1 = torch.max(yy1, y1[idx])
        xx2 = torch.min(xx2, x2[idx])
        yy2 = torch.min(yy2, y2[idx])

        # Find height and width of the intersection boxes
        w = xx2 - xx1
        h = yy2 - yy1

        # Take max with 0.0 to avoid negative width and height
        w = torch.clamp(w, min=0.0)
        h = torch.clamp(h, min=0.0)

        # Find the intersection area
        inter = w * h

        # Find the areas of the remaining boxes according to the indices in order
        rem_areas = torch.index_select(areas, dim=0, index=order)

        # Calculate the match metric value (IoU or IoS)
        if match_metric == "IOU":
            # Find the union of every prediction T in P with the prediction S
            union = (rem_areas - inter) + areas[idx]
            match_metric_value = inter / union
        elif match_metric == "IOS":
            # Find the smaller area of every prediction T in P with the prediction S
            smaller = torch.min(rem_areas, areas[idx])
            match_metric_value = inter / smaller
        else:
            raise ValueError("Invalid match_metric. Choose either 'IOU' or 'IOS'.")

        # Add the condition for Truncated NMS:
        # - Keep boxes with IoU below the truncation threshold (IOUt)
        # - Handle inside (IOIt) and outside (IOOt) intersection thresholds
        mask = (match_metric_value < IOUt)  # Keep boxes with IoU below threshold

        # Apply the truncated NMS conditions
        for i, m in enumerate(mask):
            # Condition for truncated NMS (condition 1 and 2 from original pseudocode)
            iou = match_metric_value[i]
            if iou > IOIt and iou <= IOOt:  # Inside box condition
                mask[i] = False  # Remove box if condition is met
            elif iou <= IOOt and iou > IOIt:  # Outside box condition
                mask[i] = True  # Keep box if condition is met

        # Filter out the boxes based on the updated mask
        order = order[mask]

    print("Total Valid prediction: ", len(keep))
    return keep

In [62]:
# Example usage
predictions = torch.tensor([
    [10, 10, 50, 50, 0.9, 1],
    [12, 12, 48, 48, 0.85, 1],
    [60, 60, 100, 100, 0.8, 2]
])

IOUt = 0.7
IOIt = 0.5
IOOt = 0.3
match_threshold = 0.5
match_metric= 'IOU'

result = truncated_nms_merge(predictions,match_metric, match_threshold, IOUt, IOIt, IOOt)
print("Filtered Boxes:", result)

Total Valid prediction:  2
Filtered Boxes: [0, 2]


In [2]:
# Example usage
predictions = torch.tensor([
    [10, 10, 50, 50, 0.9, 1],
    [12, 12, 48, 48, 0.85, 1],
    [60, 60, 100, 100, 0.8, 2]
])

IOUt = 0.7
IOIt = 0.5
IOOt = 0.3
match_threshold = 0.5
match_metric= 'IOU'

result = truncated_nms_merge(predictions, match_metric, IOUt, IOIt, IOOt)
print("Filtered Boxes:", result)

Total Valid prediction:  2
Filtered Boxes: [0, 2]


In [1]:
from typing import List, Dict

# --- Reuse DetectionBox and all filter/NMS functions from earlier ---

class DetectionBox:
    def __init__(self, x1, y1, x2, y2, score, label):
        self.x1 = x1; self.y1 = y1; self.x2 = x2; self.y2 = y2
        self.score = score; self.label = label

    def get_center(self):
        return ((self.x1 + self.x2)/2, (self.y1 + self.y2)/2)
    
    def get_area(self):
        return (self.x2 - self.x1) * (self.y2 - self.y1)
    
    def is_touching_edge(self, img_w, img_h, edge_thresh=10):
        return (
            abs(self.x1) <= edge_thresh or
            abs(self.y1) <= edge_thresh or
            abs(img_w - self.x2) <= edge_thresh or
            abs(img_h - self.y2) <= edge_thresh
        )

def contextual_filter(boxes: List[DetectionBox], img_h: int) -> List[bool]:
    return [
        not (b.label in ['car','bus','truck'] and b.get_center()[1] < img_h * 0.3)
        for b in boxes
    ]

def correct_misclassifications(boxes: List[DetectionBox], confusion_map: Dict[str, str]) -> List[DetectionBox]:
    return [
        DetectionBox(b.x1, b.y1, b.x2, b.y2, b.score, confusion_map.get(b.label, b.label))
        for b in boxes
    ]

def boundary_filter(boxes: List[DetectionBox], img_w: int, img_h: int) -> List[bool]:
    return [
        not b.is_touching_edge(img_w, img_h) or b.score >= 0.5
        for b in boxes
    ]

def final_score_adjustment(
    boxes: List[DetectionBox],
    region_mask: List[bool],
    boundary_mask: List[bool],
    min_score: float = 0.3
) -> List[DetectionBox]:
    adjusted = []
    for b, r_valid, e_valid in zip(boxes, region_mask, boundary_mask):
        score = b.score * (0.5 if not r_valid else 1.0) * (0.7 if not e_valid else 1.0)
        if score >= min_score:
            adjusted.append(DetectionBox(b.x1, b.y1, b.x2, b.y2, score, b.label))
    return adjusted

# Pure-Python IoU and NMS
def iou(box1: DetectionBox, box2: DetectionBox) -> float:
    ix1 = max(box1.x1, box2.x1); iy1 = max(box1.y1, box2.y1)
    ix2 = min(box1.x2, box2.x2); iy2 = min(box1.y2, box2.y2)
    iw = max(0, ix2 - ix1); ih = max(0, iy2 - iy1)
    inter = iw * ih
    if inter == 0: return 0.0
    union = box1.get_area() + box2.get_area() - inter
    return inter / union

def apply_nms(boxes: List[DetectionBox], iou_thresh: float = 0.5) -> List[DetectionBox]:
    final = []
    for cls in set(b.label for b in boxes):
        cls_boxes = [b for b in boxes if b.label == cls]
        cls_boxes.sort(key=lambda b: b.score, reverse=True)
        keep = []
        while cls_boxes:
            current = cls_boxes.pop(0)
            keep.append(current)
            cls_boxes = [b for b in cls_boxes if iou(current, b) < iou_thresh]
        final.extend(keep)
    return final

# --- Integrated Pipeline ---

def filter_and_nms(
    detections: List[DetectionBox],
    image_width: int,
    image_height: int,
    confusion_map: Dict[str, str],
    iou_threshold: float = 0.5,
    min_score: float = 0.3
) -> List[DetectionBox]:
    # Step 3: Contextual filtering
    region_mask = contextual_filter(detections, image_height)
    # Step 6: Label correction
    corrected = correct_misclassifications(detections, confusion_map)
    # Step 8: Boundary filtering
    boundary_mask = boundary_filter(corrected, image_width, image_height)
    # Step 9: Score adjustment & thresholding
    adjusted = final_score_adjustment(corrected, region_mask, boundary_mask, min_score)
    # NMS
    return apply_nms(adjusted, iou_threshold)


# ----- Example Usage -----
if __name__ == "__main__":
    img_w, img_h = 640, 480
    raw_preds = [
        DetectionBox(50,20,150,120,0.9,'car'),
        DetectionBox(30,430,100,470,0.6,'person'),
        DetectionBox(600,10,639,100,0.4,'bus'),
        DetectionBox(55,25,155,125,0.8,'van')
    ]
    confusion = {'van':'car','truck':'bus'}

    final_detections = filter_and_nms(
        raw_preds, img_w, img_h, confusion,
        iou_threshold=0.5, min_score=0.3
    )
    # Print results
    for det in final_detections:
        print(f"Label: {det.label}, Score: {det.score:.2f}, BBox: ({det.x1},{det.y1},{det.x2},{det.y2})")



Label: person, Score: 0.60, BBox: (30,430,100,470)
Label: car, Score: 0.80, BBox: (55,25,155,125)


In [3]:
from typing import List

# --- DetectionBox and Filters (without confusion map) ---

class DetectionBox:
    def __init__(self, x1, y1, x2, y2, score, label):
        self.x1 = x1; self.y1 = y1; self.x2 = x2; self.y2 = y2
        self.score = score; self.label = label

    def get_center(self):
        return ((self.x1 + self.x2) / 2, (self.y1 + self.y2) / 2)
    
    def get_area(self):
        return (self.x2 - self.x1) * (self.y2 - self.y1)
    
    def is_touching_edge(self, img_w, img_h, edge_thresh=10):
        return (
            abs(self.x1) <= edge_thresh or
            abs(self.y1) <= edge_thresh or
            abs(img_w - self.x2) <= edge_thresh or
            abs(img_h - self.y2) <= edge_thresh
        )

def contextual_filter(boxes: List[DetectionBox], img_h: int) -> List[bool]:
    return [
        not (b.label in ['car', 'bus', 'truck'] and b.get_center()[1] < img_h * 0.3)
        for b in boxes
    ]

def boundary_filter(boxes: List[DetectionBox], img_w: int, img_h: int) -> List[bool]:
    return [
        not b.is_touching_edge(img_w, img_h) or b.score >= 0.5
        for b in boxes
    ]

def final_score_adjustment(
    boxes: List[DetectionBox],
    region_mask: List[bool],
    boundary_mask: List[bool],
    min_score: float = 0.3
) -> List[DetectionBox]:
    adjusted = []
    for b, r_valid, e_valid in zip(boxes, region_mask, boundary_mask):
        score = b.score * (0.5 if not r_valid else 1.0) * (0.7 if not e_valid else 1.0)
        if score >= min_score:
            adjusted.append(DetectionBox(b.x1, b.y1, b.x2, b.y2, score, b.label))
    return adjusted

# Pure-Python IoU and NMS
def iou(box1: DetectionBox, box2: DetectionBox) -> float:
    ix1 = max(box1.x1, box2.x1); iy1 = max(box1.y1, box2.y1)
    ix2 = min(box1.x2, box2.x2); iy2 = min(box1.y2, box2.y2)
    iw = max(0, ix2 - ix1); ih = max(0, iy2 - iy1)
    inter = iw * ih
    if inter == 0: return 0.0
    union = box1.get_area() + box2.get_area() - inter
    return inter / union

def apply_nms(boxes: List[DetectionBox], iou_thresh: float = 0.5) -> List[DetectionBox]:
    final = []
    for cls in set(b.label for b in boxes):
        cls_boxes = [b for b in boxes if b.label == cls]
        cls_boxes.sort(key=lambda b: b.score, reverse=True)
        keep = []
        while cls_boxes:
            curr = cls_boxes.pop(0)
            keep.append(curr)
            cls_boxes = [b for b in cls_boxes if iou(curr, b) < iou_thresh]
        final.extend(keep)
    return final

# --- Integrated Pipeline (without confusion map) ---
def filter_and_nms(
    detections: List[DetectionBox],
    image_width: int,
    image_height: int,
    iou_threshold: float = 0.5,
    min_score: float = 0.3
) -> List[DetectionBox]:
    region_mask = contextual_filter(detections, image_height)
    boundary_mask = boundary_filter(detections, image_width, image_height)
    adjusted = final_score_adjustment(detections, region_mask, boundary_mask, min_score)
    return apply_nms(adjusted, iou_threshold)

# ----- Example Usage -----
if __name__ == "__main__":
    img_w, img_h = 640, 480
    raw_preds = [
        DetectionBox(50, 20, 150, 120, 0.9, 'car'),
        DetectionBox(30, 430, 100, 470, 0.6, 'person'),
        DetectionBox(600, 10, 639, 100, 0.4, 'bus'),
        DetectionBox(55, 25, 155, 125, 0.8, 'van')
    ]

    final_detections = filter_and_nms(
        raw_preds, img_w, img_h,
        iou_threshold=0.5, min_score=0.3
    )
    for d in final_detections:
        print(f"{d.label}: {d.score:.2f}, {d.x1},{d.y1},{d.x2},{d.y2}")


person: 0.60, 30,430,100,470
van: 0.80, 55,25,155,125
car: 0.45, 50,20,150,120
