In [1]:
# Import required functions and classes
from sahi import AutoDetectionModel
from sahi.utils.cv import read_image, read_image_as_pil
from sahi.utils.file import Path, increment_path, list_files, save_json, save_pickle, download_from_url
from sahi.predict import get_prediction, get_sliced_prediction, predict, agg_prediction, get_prediction_batched, get_sliced_prediction_batched, predict 
from sahi.prediction import visualize_object_predictions
from IPython.display import Image
from numpy import asarray
import cv2
import os
import json
import numpy as np
from tqdm import tqdm
import time
from sahi.prediction import ObjectPrediction, PredictionResult

In [2]:
# Download YOLOv8-S model to 'models/yolov8s.pt'
yolov8_model_path = 'models/yolov8/last.pt'
#download_yolov8s_model(destination_path=yolov8_model_path)

In [3]:
detection_model = AutoDetectionModel.from_pretrained(
    model_type='yolov8',
    model_path=yolov8_model_path,
    confidence_threshold=0.3,
    device="cuda:0", # or 'cpu'
)

In [4]:
import cv2
import time
from PIL import Image

def get_slice_parameters(object_density, slice_size):
    
    #start_time = time.time()
    #image_path = "test_data/0000006_06773_d_0000018.jpg"
    #image = Image.open(image_path).convert("RGB")
    #image_width, image_height  = image.size
    #print("Image Width:", image_width)
    #print("Image Height:", image_height)
    #min_dim = min(image_width, image_height)
    #slice_size = min_dim // 4 if min_dim > 1600 else min_dim // 2
    #print(f"Dimension calculation time taken: {(time.time() - start_time)*1000:.2f} ms")

    
    if object_density >= 50:
        #slice_size = min_dim // 4
        slice_width = slice_size
        slice_height = slice_size
        overlap_width_ratio = 0.5
        overlap_height_ratio = 0.5
    elif 25 <= object_density < 50:
        #slice_size = min_dim // 2
        slice_width = slice_size
        slice_height = slice_size
        overlap_width_ratio = 0.25
        overlap_height_ratio = 0.25
    elif 10 <= object_density < 25:
        #slice_size = min_dim // 2
        slice_width = slice_size
        slice_height = slice_size
        overlap_width_ratio = 0.15
        overlap_height_ratio = 0.15
    else:
        return

    return slice_width, slice_height, overlap_width_ratio, overlap_height_ratio

In [5]:
# Function to get image details by image_id
def get_image_id(coco_data, image_name):
    for image in coco_data["images"]:
        file_name = Path(image['file_name']).stem
        if file_name == image_name:
            return image['id']
    return None

In [8]:
# export visualization
def predict_sliced_images(input_folder, dataset_json_path, detection_model, slice_size):
    """
    Processes all image files in input_folder:
      - Runs predictions using get_prediction function and detection_model.
      - Saves annotated images with bounding boxes in output_folder.
      - Saves prediction details as JSON files in output_folder.
    
    Parameters:
      input_folder (str): Path to the folder containing images.
      detection_model: Your detection model used for prediction.
      slice_size (int): Parameter for slice size used in get_slice_parameters.
    """
    name = "exp"
    save_dir = Path(increment_path(Path("sliced_predictions") / name, exist_ok=False))
    os.makedirs(save_dir, exist_ok=True)

    if dataset_json_path:
        with open(dataset_json_path, "r") as file:
            data = json.load(file)
    
    # Visualization parameters
    visual_bbox_gt_thickness = 3
    visual_bbox_thickness = 2
    visual_text_size = 0.5
    visual_text_thickness = 1
    visual_hide_labels = False
    visual_hide_conf = False
    visual_export_format = 'png'
    
    sliced_predictions = []
    coco_json = []
    
    # Initialize a variable to accumulate total prediction time for all images.
    total_prediction_time = 0.0
    
    # Loop over files in the input folder
    for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(input_folder, filename)
            image_as_pil = read_image_as_pil(image_path)
            filename_without_ext = Path(filename).stem
            
            print("*****************************************")
            print("File Name", filename_without_ext)

            img_id = get_image_id(data, filename_without_ext)
            
            # Get initial predictions from your detection model
            time_start = time.time()
            prediction = get_prediction(image_path, detection_model)
            time_end = time.time() - time_start
            print("Initial Prediction time is: {:.2f} ms".format(time_end * 1000))
            
            # Add initial prediction time to the cumulative total.
            iteration_time = time_end
            
            object_density = len(prediction.object_prediction_list)
            print("Object Density:", object_density)
            
            if object_density > 10:
                slice_width, slice_height, overlap_width_ratio, overlap_height_ratio = get_slice_parameters(object_density, slice_size)
    
                print("********* Slice Parameters ***********")
                print("Slice Width: ", slice_width)
                print("Slice Height: ", slice_height)
                print("Overlap Width Ratio: ", overlap_width_ratio)
                print("Overlap Height Ratio: ", overlap_height_ratio)

                time_start_slice = time.time()
                result_sahi = get_sliced_prediction(
                    image_path,
                    detection_model,
                    slice_height=slice_height,
                    slice_width=slice_width,
                    overlap_height_ratio=overlap_height_ratio,
                    overlap_width_ratio=overlap_width_ratio,
                    postprocess_min_area=16,
                    postprocess_type="TruncatedNMS",
                    verbose=2
                )
                time_end_slice = time.time() - time_start_slice
                print("Sliced Prediction time is: {:.2f} ms".format(time_end_slice * 1000))
                
                # Add sliced prediction time to the current iteration's total.
                iteration_time += time_end_slice
                
                coco_prediction = result_sahi.to_coco_predictions(image_id=img_id)
                for idx, predict in enumerate(coco_prediction):
                    if coco_prediction[idx]["bbox"]:
                        coco_json.append(predict)
                    
                sliced_predictions.append(result_sahi)
                
                visualize_object_predictions(
                    np.ascontiguousarray(image_as_pil),
                    object_prediction_list=result_sahi.object_prediction_list,
                    rect_th=visual_bbox_thickness,
                    text_size=visual_text_size,
                    text_th=visual_text_thickness,
                    hide_labels=visual_hide_labels,
                    hide_conf=visual_hide_conf,
                    output_dir=save_dir,
                    file_name=filename_without_ext,
                    export_format=visual_export_format,
                )
                
            else:
                print("Prediction time is: {:.2f} ms".format(time_end * 1000))
                
                coco_prediction = prediction.to_coco_predictions(image_id=img_id)
                for idx, predict in enumerate(coco_prediction):
                    if coco_prediction[idx]["bbox"]:
                        coco_json.append(predict)
                    
                sliced_predictions.append(prediction)
                
                visualize_object_predictions(
                    np.ascontiguousarray(image_as_pil),
                    object_prediction_list=prediction.object_prediction_list,
                    rect_th=visual_bbox_thickness,
                    text_size=visual_text_size,
                    text_th=visual_text_thickness,
                    hide_labels=visual_hide_labels,
                    hide_conf=visual_hide_conf,
                    output_dir=save_dir,
                    file_name=filename_without_ext,
                    export_format=visual_export_format,
                )
            
            # Update the overall total prediction time
            total_prediction_time += iteration_time
                 
    if dataset_json_path:
        save_path = str(save_dir / "result.json")
        save_json(coco_json, save_path)
        print(f"Prediction results are successfully exported to {save_dir}")
    
    print(f"Prediction Completed Successfully: {len(sliced_predictions)} images")
    print("Total Prediction time for all images is: {:.2f} ms".format(total_prediction_time * 1000))
    return sliced_predictions


#### **Usage**

#### **Subset of 15 images**

In [12]:
# Example usage: Adaptive-Optimized-NMS (Adaptive-OptNMS-ALL) - min-area-1024
source_folder = './test_vis_data/images'
json_path = "./subset_vis_test_data_15.json"
slice_size = 512
result_preds_adapt_nms_opt_iou_all = predict_sliced_images(source_folder, json_path, detection_model, slice_size)

*****************************************
File Name 0000207_00300_d_0000004
Initial Prediction time is: 15.51 ms
Object Density: 25
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.25
Overlap Height Ratio:  0.25
Sliced Boxes Count: 6
POST PROCESS:  TruncatedNMS
Performing prediction on 6 slices.
Original Prediction Count 213
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  213
Final Bounding Box Count (Truncated NMS):  63
Slicing performed in 0.0029900074005126953 seconds.
Prediction performed in 0.21706771850585938 seconds.
Sliced Prediction time is: 220.47 ms
*****************************************
File Name 0000074_07850_d_0000015
Initial Prediction time is: 37.92 ms
Object Density: 53
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.5
Overlap Height Ratio:  0.5
Sliced Boxes Count: 28
POST PROCESS:  TruncatedNMS
Performing prediction on 28 slices.
Ori

In [13]:
#USING Adaptive-Optimized-NMS-IoU METHOD (Adaptive-TruncatedNMS-ALL)
!sahi coco evaluate --dataset_json_path './subset_vis_test_data_15.json' --result_json_path './sliced_predictions/exp202/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.55s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.204
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.330
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.229
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.300
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.472
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.240
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.171
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.332
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [16]:
result_predict_truncatednms_15_all = predict(source='./test_vis_data',
                         dataset_json_path = './subset_vis_test_data_15.json',
                         model_type = 'ultralytics',
                         model_path = 'models/yolov8/last.pt',
                         slice_height = 512,
                         slice_width = 512,
                         overlap_height_ratio = 0.25,
                         overlap_width_ratio = 0.25,
                         postprocess_type = "TruncatedNMS",
                         postprocess_min_area =  16,
                         postprocess_conf_threshold = 0.3,                  
                         verbose = 2
                        )

POST PROCESSING: TruncatedNMS
indexing coco dataset annotations...


Loading coco annotations: 100%|██████████| 15/15 [00:00<00:00, 296.98it/s]
Performing inference on images:   0%|          | 0/15 [00:00<?, ?it/s]

Image Name: 0000207_00300_d_0000004
Image Size:  (960, 540)
Sliced Boxes Count: 6
POST PROCESS:  TruncatedNMS
Performing prediction on 6 slices.
Original Prediction Count 265
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  213


Performing inference on images:   7%|▋         | 1/15 [00:00<00:04,  2.99it/s]

Final Bounding Box Count (Truncated NMS):  63
Prediction time is: 271.98 ms
Image Name: 0000074_07850_d_0000015
Image Size:  (1920, 1080)
Sliced Boxes Count: 15
POST PROCESS:  TruncatedNMS
Performing prediction on 15 slices.
Original Prediction Count 465
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  386


Performing inference on images:   7%|▋         | 1/15 [00:01<00:04,  2.99it/s]

Final Bounding Box Count (Truncated NMS):  142
Prediction time is: 683.30 ms


Performing inference on images:  13%|█▎        | 2/15 [00:01<00:08,  1.48it/s]

Image Name: 0000187_00444_d_0000190
Image Size:  (960, 540)
Sliced Boxes Count: 6
POST PROCESS:  TruncatedNMS
Performing prediction on 6 slices.
Original Prediction Count 217
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  196
Final Bounding Box Count (Truncated NMS):  40
Prediction time is: 170.49 ms


Performing inference on images:  20%|██        | 3/15 [00:01<00:05,  2.12it/s]

Image Name: 0000207_00600_d_0000007
Image Size:  (960, 540)
Sliced Boxes Count: 6
POST PROCESS:  TruncatedNMS
Performing prediction on 6 slices.
Original Prediction Count 419
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  343


Performing inference on images:  27%|██▋       | 4/15 [00:01<00:04,  2.23it/s]

Final Bounding Box Count (Truncated NMS):  93
Prediction time is: 342.43 ms
Image Name: 0000087_00299_d_0000002
Image Size:  (960, 540)
Sliced Boxes Count: 6
POST PROCESS:  TruncatedNMS
Performing prediction on 6 slices.
Original Prediction Count 480
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  395


Performing inference on images:  33%|███▎      | 5/15 [00:02<00:04,  2.17it/s]

Final Bounding Box Count (Truncated NMS):  105
Prediction time is: 413.64 ms
Image Name: 0000259_00500_d_0000002
Image Size:  (1360, 765)
Sliced Boxes Count: 8
POST PROCESS:  TruncatedNMS
Performing prediction on 8 slices.


Performing inference on images:  40%|████      | 6/15 [00:02<00:03,  2.50it/s]

Original Prediction Count 122
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  109
Final Bounding Box Count (Truncated NMS):  44
Prediction time is: 180.13 ms
Image Name: 0000074_08202_d_0000016
Image Size:  (1920, 1080)
Sliced Boxes Count: 15
POST PROCESS:  TruncatedNMS
Performing prediction on 15 slices.
Original Prediction Count 437
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  339


Performing inference on images:  40%|████      | 6/15 [00:03<00:03,  2.50it/s]

Final Bounding Box Count (Truncated NMS):  122
Prediction time is: 573.27 ms


Performing inference on images:  47%|████▋     | 7/15 [00:03<00:04,  1.90it/s]

Image Name: 0000011_04202_d_0000007
Image Size:  (1360, 765)
Sliced Boxes Count: 8
POST PROCESS:  TruncatedNMS
Performing prediction on 8 slices.
Original Prediction Count 114
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  87
Final Bounding Box Count (Truncated NMS):  30
Prediction time is: 163.13 ms


Performing inference on images:  53%|█████▎    | 8/15 [00:03<00:03,  2.25it/s]

Image Name: 0000189_00297_d_0000198
Image Size:  (960, 540)
Sliced Boxes Count: 6
POST PROCESS:  TruncatedNMS
Performing prediction on 6 slices.
Original Prediction Count 236
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  210
Final Bounding Box Count (Truncated NMS):  44
Prediction time is: 186.96 ms


Performing inference on images:  60%|██████    | 9/15 [00:04<00:02,  2.60it/s]

Image Name: 0000186_01387_d_0000188
Image Size:  (960, 540)
Sliced Boxes Count: 6
POST PROCESS:  TruncatedNMS
Performing prediction on 6 slices.
Original Prediction Count 207
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  183
Final Bounding Box Count (Truncated NMS):  46
Prediction time is: 173.91 ms


Performing inference on images:  67%|██████▋   | 10/15 [00:04<00:01,  2.95it/s]

Image Name: 0000087_01580_d_0000005
Image Size:  (960, 540)
Sliced Boxes Count: 6
POST PROCESS:  TruncatedNMS
Performing prediction on 6 slices.
Original Prediction Count 305
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  252


Performing inference on images:  73%|███████▎  | 11/15 [00:04<00:01,  3.06it/s]

Final Bounding Box Count (Truncated NMS):  66
Prediction time is: 238.40 ms
Image Name: 0000078_01314_d_0000004
Image Size:  (1360, 765)
Sliced Boxes Count: 8
POST PROCESS:  TruncatedNMS
Performing prediction on 8 slices.
Original Prediction Count 4
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  4
Final Bounding Box Count (Truncated NMS):  2
Prediction time is: 134.02 ms


Performing inference on images:  80%|████████  | 12/15 [00:04<00:00,  3.43it/s]

Image Name: 0000054_00786_d_0000001
Image Size:  (960, 540)
Sliced Boxes Count: 6
POST PROCESS:  TruncatedNMS
Performing prediction on 6 slices.
Original Prediction Count 432
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  365


Performing inference on images:  87%|████████▋ | 13/15 [00:05<00:00,  3.02it/s]

Final Bounding Box Count (Truncated NMS):  89
Prediction time is: 359.48 ms
Image Name: 0000192_00522_d_0000213
Image Size:  (960, 540)
Sliced Boxes Count: 6
POST PROCESS:  TruncatedNMS
Performing prediction on 6 slices.
Original Prediction Count 245
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  215


Performing inference on images:  93%|█████████▎| 14/15 [00:05<00:00,  3.26it/s]

Final Bounding Box Count (Truncated NMS):  44
Prediction time is: 188.36 ms
Image Name: 0000078_06777_d_0000020
Image Size:  (1360, 765)
Sliced Boxes Count: 8
POST PROCESS:  TruncatedNMS
Performing prediction on 8 slices.
Original Prediction Count 13
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  12
Final Bounding Box Count (Truncated NMS):  2


Performing inference on images: 100%|██████████| 15/15 [00:05<00:00,  2.67it/s]

Prediction time is: 135.18 ms
Prediction results are successfully exported to runs/predict/exp225
Model loaded in 0.051589012145996094 seconds.
Slicing performed in 0.014005899429321289 seconds.
Prediction performed in 4.214673280715942 seconds.
Exporting performed in 0.042687177658081055 seconds.





In [17]:
#USING Adaptive-Optimized-NMS-IoU METHOD (TruncatedNMS-ALL)
!sahi coco evaluate --dataset_json_path './subset_vis_test_data_15.json' --result_json_path './runs/predict/exp225/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.54s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.086
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.155
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.096
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.164
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.219
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.067
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.090
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.142
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [19]:
result_predict_truncatednms_10_all = predict(source='./test_visdrone_data',
                         dataset_json_path = './subset_visdrone_test_data_10.json',
                         model_type = 'ultralytics',
                         model_path = 'models/yolov8/last.pt',
                         slice_height = 512,
                         slice_width = 512,
                         overlap_height_ratio = 0.5,
                         overlap_width_ratio = 0.5,
                         postprocess_type = "TruncatedNMS",
                         postprocess_min_area =  16,
                         postprocess_conf_threshold = 0.3,                  
                         verbose = 2
                        )

POST PROCESSING: TruncatedNMS
indexing coco dataset annotations...


Loading coco annotations: 100%|██████████| 10/10 [00:00<00:00, 182.90it/s]
Performing inference on images:   0%|          | 0/10 [00:00<?, ?it/s]

Image Name: 0000074_03738_d_0000007
Image Size:  (1920, 1080)
Sliced Boxes Count: 28
POST PROCESS:  TruncatedNMS
Performing prediction on 28 slices.
Original Prediction Count 548
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  408


Performing inference on images:   0%|          | 0/10 [00:00<?, ?it/s]

Final Bounding Box Count (Truncated NMS):  96
Prediction time is: 784.66 ms


Performing inference on images:  10%|█         | 1/10 [00:01<00:08,  1.00it/s]

Image Name: 0000074_07850_d_0000015
Image Size:  (1920, 1080)
Sliced Boxes Count: 28
POST PROCESS:  TruncatedNMS
Performing prediction on 28 slices.
Original Prediction Count 1004
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  826


Performing inference on images:  10%|█         | 1/10 [00:02<00:08,  1.00it/s]

Final Bounding Box Count (Truncated NMS):  154
Prediction time is: 1441.15 ms


Performing inference on images:  20%|██        | 2/10 [00:02<00:11,  1.39s/it]

Image Name: 0000074_02723_d_0000005
Image Size:  (1920, 1080)
Sliced Boxes Count: 28
POST PROCESS:  TruncatedNMS
Performing prediction on 28 slices.
Original Prediction Count 638
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  484


Performing inference on images:  30%|███       | 3/10 [00:03<00:08,  1.22s/it]

Final Bounding Box Count (Truncated NMS):  101
Prediction time is: 810.70 ms


Performing inference on images:  30%|███       | 3/10 [00:03<00:08,  1.22s/it]

Image Name: 0000074_08202_d_0000016
Image Size:  (1920, 1080)
Sliced Boxes Count: 28
POST PROCESS:  TruncatedNMS
Performing prediction on 28 slices.
Original Prediction Count 869
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  672


Performing inference on images:  30%|███       | 3/10 [00:04<00:08,  1.22s/it]

Final Bounding Box Count (Truncated NMS):  133
Prediction time is: 1093.96 ms


Performing inference on images:  40%|████      | 4/10 [00:05<00:07,  1.26s/it]

Image Name: 0000073_05999_d_0000007
Image Size:  (1920, 1080)
Sliced Boxes Count: 28
POST PROCESS:  TruncatedNMS
Performing prediction on 28 slices.
Original Prediction Count 677
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  500


Performing inference on images:  50%|█████     | 5/10 [00:06<00:05,  1.19s/it]

Final Bounding Box Count (Truncated NMS):  113
Prediction time is: 859.31 ms


Performing inference on images:  50%|█████     | 5/10 [00:06<00:05,  1.19s/it]

Image Name: 0000074_05715_d_0000011
Image Size:  (1920, 1080)
Sliced Boxes Count: 28
POST PROCESS:  TruncatedNMS
Performing prediction on 28 slices.
Original Prediction Count 699
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  553


Performing inference on images:  60%|██████    | 6/10 [00:07<00:04,  1.16s/it]

Final Bounding Box Count (Truncated NMS):  111
Prediction time is: 896.19 ms


Performing inference on images:  60%|██████    | 6/10 [00:07<00:04,  1.16s/it]

Image Name: 0000074_07297_d_0000014
Image Size:  (1920, 1080)
Sliced Boxes Count: 28
POST PROCESS:  TruncatedNMS
Performing prediction on 28 slices.
Original Prediction Count 984
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  736


Performing inference on images:  60%|██████    | 6/10 [00:08<00:04,  1.16s/it]

Final Bounding Box Count (Truncated NMS):  148
Prediction time is: 1243.01 ms


Performing inference on images:  70%|███████   | 7/10 [00:08<00:03,  1.26s/it]

Image Name: 0000074_06746_d_0000013
Image Size:  (1920, 1080)
Sliced Boxes Count: 28
POST PROCESS:  TruncatedNMS
Performing prediction on 28 slices.
Original Prediction Count 893
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  679


Performing inference on images:  70%|███████   | 7/10 [00:09<00:03,  1.26s/it]

Final Bounding Box Count (Truncated NMS):  141
Prediction time is: 1142.27 ms


Performing inference on images:  80%|████████  | 8/10 [00:10<00:02,  1.29s/it]

Image Name: 0000074_01218_d_0000002
Image Size:  (1920, 1080)
Sliced Boxes Count: 28
POST PROCESS:  TruncatedNMS
Performing prediction on 28 slices.
Original Prediction Count 638
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  483


Performing inference on images:  90%|█████████ | 9/10 [00:11<00:01,  1.22s/it]

Final Bounding Box Count (Truncated NMS):  120
Prediction time is: 850.84 ms
Image Name: 0000074_08777_d_0000017
Image Size:  (1920, 1080)
Sliced Boxes Count: 28
POST PROCESS:  TruncatedNMS
Performing prediction on 28 slices.


Performing inference on images:  90%|█████████ | 9/10 [00:11<00:01,  1.22s/it]

Original Prediction Count 174
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  131
Final Bounding Box Count (Truncated NMS):  48
Prediction time is: 497.13 ms


Performing inference on images: 100%|██████████| 10/10 [00:11<00:00,  1.17s/it]

Prediction results are successfully exported to runs/predict/exp227
Model loaded in 0.3035259246826172 seconds.
Slicing performed in 0.02210831642150879 seconds.
Prediction performed in 9.61922287940979 seconds.
Exporting performed in 0.07789754867553711 seconds.





In [20]:
#USING Adaptive-Optimized-NMS-IoU METHOD (TruncatedNMS-ALL)
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_data_10.json' --result_json_path './runs/predict/exp227/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.90s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.022
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.049
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.020
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.145
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.049
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.010
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.077
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.019
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [21]:
# Example usage: Adaptive-Optimized-NMS (TruncatedNMS-ALL) - min-area-16
source_folder = './test_visdrone_data/images'
json_path = "./subset_visdrone_test_data_10.json"
slice_size = 512
result_preds_adapt_nms_opt_iou_all = predict_sliced_images(source_folder, json_path, detection_model, slice_size)

*****************************************
File Name 0000074_03738_d_0000007
Initial Prediction time is: 41.28 ms
Object Density: 52
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.5
Overlap Height Ratio:  0.5
Sliced Boxes Count: 28
POST PROCESS:  TruncatedNMS
Performing prediction on 28 slices.
Original Prediction Count 408
Confidence Scores:  0.3
Min area Threshold:  16
Adaptive Filtered Prediction:  408
Final Bounding Box Count (Truncated NMS):  96
Slicing performed in 0.01041102409362793 seconds.
Prediction performed in 0.7468652725219727 seconds.
Sliced Prediction time is: 755.80 ms
*****************************************
File Name 0000074_07850_d_0000015
Initial Prediction time is: 36.36 ms
Object Density: 53
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.5
Overlap Height Ratio:  0.5
Sliced Boxes Count: 28
POST PROCESS:  TruncatedNMS
Performing prediction on 28 slices.
Origin

In [22]:
#USING Adaptive-Optimized-NMS-IoU METHOD
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_data_10.json' --result_json_path './sliced_predictions/exp203/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.86s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.147
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.208
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.155
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.183
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.118
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.672
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.093
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.058
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

#### **Take 1**

In [14]:
# Example usage: Adaptive-Optimized-NMS
source_folder = './single_test/images'
json_path = "./subset_vis_test_data_428.json"
slice_size = 512
result_preds_adapt_opt_nms_iou_size = predict_sliced_images(source_folder, json_path, detection_model, slice_size)

*****************************************
File Name 9999952_00000_d_0000029
Intial Prediction time is: 22.68 ms
Object Density: 9
Prediction time is: 22.68 ms
Prediction results are successfully exported to sliced_predictions/exp145
Prediction Completed Sucessfully: 1 images


In [15]:
#USING Adaptive-Optimized-NMS-IoU METHOD
!sahi coco evaluate --dataset_json_path './subset_vis_test_data_428.json' --result_json_path './sliced_predictions/exp145/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.00s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.034
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.076
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.033
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.023
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.500
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.005
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.283
 Average Precision  (AP) @[ IoU=0.50:0.95 | 

In [18]:
# Example usage: Adaptive-Optimized-NMS
source_folder = './test_data/images'
json_path = "./subset_visdrone_test_data.json"
slice_size = 512
result_preds_adapt_opt_nms_iou_size = predict_sliced_images(source_folder, json_path, detection_model, slice_size)

*****************************************
File Name 9999938_00000_d_0000207
Intial Prediction time is: 29.11 ms
Object Density: 24
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.15
Overlap Height Ratio:  0.15
POST PROCESS:  TruncatedNMS
Performing prediction on 8 slices.
Original Prediction Count 341
Adaptive Prediction Count 341
Total Valid prediction:  187
Slicing performed in 0.008407115936279297 seconds.
Prediction performed in 0.7021129131317139 seconds.
Sliced Prediction time is: 708.08 ms
*****************************************
File Name 0000006_05208_d_0000014
Intial Prediction time is: 23.04 ms
Object Density: 17
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.15
Overlap Height Ratio:  0.15
POST PROCESS:  TruncatedNMS
Performing prediction on 6 slices.
Original Prediction Count 44
Adaptive Prediction Count 44
Total Valid prediction:  9
Slicing performed in 0.0057964324951

In [19]:
#USING Adaptive-Optimized-NMS-IoU METHOD
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_data.json' --result_json_path './sliced_predictions/exp146/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.40s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.251
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.355
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.283
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.314
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.668
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.297
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.225
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.504
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [21]:
# Example usage: Adaptive-Optimized-NMS (OptNMS)
source_folder = './test_data/images'
json_path = "./subset_visdrone_test_data.json"
slice_size = 512
result_preds_adapt_opt_nms_iou_size = predict_sliced_images(source_folder, json_path, detection_model, slice_size)

*****************************************
File Name 9999938_00000_d_0000207
Intial Prediction time is: 29.81 ms
Object Density: 24
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.15
Overlap Height Ratio:  0.15
POST PROCESS:  OptNMS
Performing prediction on 8 slices.
Original Prediction Count 341
Filtered Prediction:  198
Final Bounding Box Count: 108
Filtered Prediction:  81
Final Bounding Box Count: 59
Filtered Prediction:  17
Final Bounding Box Count: 10
Filtered Prediction:  3
Final Bounding Box Count: 2
Filtered Prediction:  1
Final Bounding Box Count: 1
Filtered Prediction:  21
Final Bounding Box Count: 9
Filtered Prediction:  20
Final Bounding Box Count: 11
Slicing performed in 0.007295370101928711 seconds.
Prediction performed in 0.18048357963562012 seconds.
Sliced Prediction time is: 186.29 ms
*****************************************
File Name 0000006_05208_d_0000014
Intial Prediction time is: 23.24 ms
Object Density: 17
****

In [22]:
#USING Adaptive-Optimized-NMS-IoU METHOD
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_data.json' --result_json_path './sliced_predictions/exp147/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.41s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.255
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.361
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.291
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.323
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.654
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.350
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.231
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.488
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [24]:
result_predict_nms_2 = predict(source='./test_data',
                         dataset_json_path = './subset_visdrone_test_data.json',
                         model_type = 'ultralytics',
                         model_path = 'models/yolov8/last.pt',
                         slice_height = 512,
                         slice_width = 512,
                         overlap_height_ratio = 0.5,
                         overlap_width_ratio = 0.5,
                         postprocess_type = "NMS",
                         verbose = 2
                        )

POST PROCESSING: NMS
indexing coco dataset annotations...


Loading coco annotations: 100%|██████████| 5/5 [00:00<00:00, 160.87it/s]
Performing inference on images:   0%|          | 0/5 [00:00<?, ?it/s]

Image Name: 9999938_00000_d_0000207
Image Size:  (1400, 788)
POST PROCESS:  NMS
Performing prediction on 15 slices.


Performing inference on images:  20%|██        | 1/5 [00:00<00:02,  1.83it/s]

Total Valid prediction:  153
Total Valid prediction:  1
Total Valid prediction:  62
Total Valid prediction:  15
Total Valid prediction:  3
Total Valid prediction:  1
Total Valid prediction:  9
Total Valid prediction:  22
Prediction time is: 352.77 ms


Performing inference on images:  20%|██        | 1/5 [00:00<00:02,  1.83it/s]

Image Name: 0000006_05208_d_0000014
Image Size:  (1360, 765)
POST PROCESS:  NMS
Performing prediction on 10 slices.
Total Valid prediction:  1
Total Valid prediction:  2
Total Valid prediction:  8
Prediction time is: 171.05 ms


Performing inference on images:  40%|████      | 2/5 [00:00<00:01,  2.61it/s]

Image Name: 0000370_02000_d_0000254
Image Size:  (1360, 765)
POST PROCESS:  NMS
Performing prediction on 10 slices.
Total Valid prediction:  4
Total Valid prediction:  1
Prediction time is: 165.21 ms


Performing inference on images:  60%|██████    | 3/5 [00:01<00:00,  3.10it/s]

Image Name: 0000006_06773_d_0000018
Image Size:  (1360, 765)
POST PROCESS:  NMS
Performing prediction on 10 slices.
Total Valid prediction:  1
Total Valid prediction:  21
Total Valid prediction:  8
Total Valid prediction:  13
Total Valid prediction:  3
Total Valid prediction:  2
Prediction time is: 176.44 ms


Performing inference on images:  80%|████████  | 4/5 [00:01<00:00,  3.24it/s]

Image Name: 0000006_05999_d_0000017
Image Size:  (1360, 765)
POST PROCESS:  NMS
Performing prediction on 10 slices.
Total Valid prediction:  22
Total Valid prediction:  9
Total Valid prediction:  40
Total Valid prediction:  22
Total Valid prediction:  1
Prediction time is: 183.31 ms


Performing inference on images: 100%|██████████| 5/5 [00:01<00:00,  3.03it/s]

Prediction results are successfully exported to runs/predict/exp177
Model loaded in 0.0471343994140625 seconds.
Slicing performed in 0.0055065155029296875 seconds.
Prediction performed in 1.0487713813781738 seconds.
Exporting performed in 0.04857182502746582 seconds.





In [25]:
#USING Adaptive-Optimized-NMS-IoU METHOD
!sahi coco evaluate --dataset_json_path './subset_visdrone_test_data.json' --result_json_path './runs/predict/exp177/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.57s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.252
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.368
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.285
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.332
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.701
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = 0.360
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.245
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.515
 Average Precision  (AP) @[ IoU=0.50:0.95 | a

In [16]:
result_predict_nms_1 = predict(source='./single_test',
                         dataset_json_path = './subset_vis_test_data_428.json',
                         model_type = 'ultralytics',
                         model_path = 'models/yolov8/last.pt',
                         slice_height = 512,
                         slice_width = 512,
                         overlap_height_ratio = 0.5,
                         overlap_width_ratio = 0.5,
                         postprocess_type = "NMS",
                         verbose = 2
                        )

POST PROCESSING: NMS
indexing coco dataset annotations...


Loading coco annotations: 100%|██████████| 1/1 [00:00<00:00, 526.06it/s]
Performing inference on images:   0%|          | 0/1 [00:00<?, ?it/s]

Image Name: 9999952_00000_d_0000029
Image Size:  (1400, 788)
POST PROCESS:  NMS
Performing prediction on 15 slices.


Performing inference on images: 100%|██████████| 1/1 [00:00<00:00,  2.38it/s]

Total Valid prediction:  17
Total Valid prediction:  1
Prediction time is: 327.30 ms
Prediction results are successfully exported to runs/predict/exp175
Model loaded in 0.07476592063903809 seconds.
Slicing performed in 0.0011415481567382812 seconds.
Prediction performed in 0.3273036479949951 seconds.
Exporting performed in 0.04234790802001953 seconds.





In [17]:
#USING Adaptive-Optimized-NMS-IoU METHOD
!sahi coco evaluate --dataset_json_path './subset_vis_test_data_428.json' --result_json_path './runs/predict/exp175/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.00s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.102
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.207
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.125
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.186
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.458
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.092
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.247
 Average Precision  (AP) @[ IoU=0.50:0.95 | 

##### **Truncated NMS test**

In [10]:
# Example usage: Adaptive-Optimized-NMS (Adaptive-TruncatedNMS)
source_folder = './single_test/images'
json_path = "./subset_vis_test_data_1162.json"
slice_size = 512
result_preds_adapt_opt_nms_iou_size = predict_sliced_images(source_folder, json_path, detection_model, slice_size)

*****************************************
File Name 0000011_05068_d_0000008
Intial Prediction time is: 23.70 ms
Object Density: 33
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.25
Overlap Height Ratio:  0.25
POST PROCESS:  TruncatedNMS
Performing prediction on 8 slices.
Original Prediction Count 93
Adaptive Filtered Prediction:  93
Final Bounding Box Count (Truncated NMS):  47
Slicing performed in 0.00583648681640625 seconds.
Prediction performed in 0.1876201629638672 seconds.
Sliced Prediction time is: 193.40 ms
Prediction results are successfully exported to sliced_predictions/exp152
Prediction Completed Sucessfully: 1 images
Total Prediction time is: 217.10 ms


In [11]:
#USING Adaptive-Optimized-NMS-IoU METHOD (Adaptive-TruncatedNMS)
!sahi coco evaluate --dataset_json_path './subset_vis_test_data_1162.json' --result_json_path './sliced_predictions/exp152/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.076
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.136
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.068
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.080
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.429
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.038
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.265
 Average Precision  (AP) @[ IoU=0.50:0.95 | 

In [27]:
# Example usage: Adaptive-Optimized-NMS (OptNMS)
source_folder = './single_test/images'
json_path = "./subset_vis_test_data_1162.json"
slice_size = 512
result_preds_adapt_opt_nms_iou_size = predict_sliced_images(source_folder, json_path, detection_model, slice_size)

*****************************************
File Name 0000011_05068_d_0000008
Intial Prediction time is: 26.26 ms
Object Density: 33
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.25
Overlap Height Ratio:  0.25
POST PROCESS:  OptNMS
Performing prediction on 8 slices.
Original Prediction Count 93
Filtered Prediction:  63
Final Bounding Box Count: 31
Filtered Prediction:  8
Final Bounding Box Count: 6
Filtered Prediction:  3
Final Bounding Box Count: 2
Filtered Prediction:  1
Final Bounding Box Count: 1
Filtered Prediction:  11
Final Bounding Box Count: 6
Filtered Prediction:  1
Final Bounding Box Count: 1
Filtered Prediction:  6
Final Bounding Box Count: 6
Slicing performed in 0.007169246673583984 seconds.
Prediction performed in 0.16434741020202637 seconds.
Sliced Prediction time is: 169.24 ms
Prediction results are successfully exported to sliced_predictions/exp148
Prediction Completed Sucessfully: 1 images
Total Prediction time is: 1

In [28]:
#USING Adaptive-Optimized-NMS-IoU METHOD (OptNMS)
!sahi coco evaluate --dataset_json_path './subset_vis_test_data_1162.json' --result_json_path './sliced_predictions/exp148/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.082
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.149
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.068
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.096
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.429
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.046
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.265
 Average Precision  (AP) @[ IoU=0.50:0.95 | 

In [30]:
# Example usage: Adaptive-Optimized-NMS (TruncatedNMS)
source_folder = './single_test/images'
json_path = "./subset_vis_test_data_1162.json"
slice_size = 512
result_preds_adapt_opt_nms_iou_size = predict_sliced_images(source_folder, json_path, detection_model, slice_size)

*****************************************
File Name 0000011_05068_d_0000008
Intial Prediction time is: 28.54 ms
Object Density: 33
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.25
Overlap Height Ratio:  0.25
POST PROCESS:  TruncatedNMS
Performing prediction on 8 slices.
Original Prediction Count 93
Adaptive Prediction Count 93
Total Valid prediction:  46
Slicing performed in 0.005643367767333984 seconds.
Prediction performed in 0.1970047950744629 seconds.
Sliced Prediction time is: 202.37 ms
Prediction results are successfully exported to sliced_predictions/exp149
Prediction Completed Sucessfully: 1 images
Total Prediction time is: 230.91 ms


In [31]:
#USING Adaptive-Optimized-NMS-IoU METHOD (TruncatedNMS)
!sahi coco evaluate --dataset_json_path './subset_vis_test_data_1162.json' --result_json_path './sliced_predictions/exp149/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.076
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.137
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.069
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.080
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.429
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.038
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.265
 Average Precision  (AP) @[ IoU=0.50:0.95 | 

In [32]:
result_predict_nms_2 = predict(source='./single_test',
                         dataset_json_path = './subset_vis_test_data_1162.json',
                         model_type = 'ultralytics',
                         model_path = 'models/yolov8/last.pt',
                         slice_height = 512,
                         slice_width = 512,
                         overlap_height_ratio = 0.5,
                         overlap_width_ratio = 0.5,
                         postprocess_type = "NMS",
                         verbose = 2
                        )

POST PROCESSING: NMS
indexing coco dataset annotations...


Loading coco annotations: 100%|██████████| 1/1 [00:00<00:00, 450.23it/s]
Performing inference on images:   0%|          | 0/1 [00:00<?, ?it/s]

Image Name: 0000011_05068_d_0000008
Image Size:  (1360, 765)
POST PROCESS:  NMS
Performing prediction on 10 slices.


Performing inference on images: 100%|██████████| 1/1 [00:00<00:00,  2.86it/s]

Total Valid prediction:  34
Total Valid prediction:  9
Total Valid prediction:  3
Total Valid prediction:  3
Total Valid prediction:  7
Total Valid prediction:  2
Total Valid prediction:  10
Prediction time is: 241.23 ms
Prediction results are successfully exported to runs/predict/exp178
Model loaded in 0.036215782165527344 seconds.
Slicing performed in 0.0012192726135253906 seconds.
Prediction performed in 0.24123263359069824 seconds.
Exporting performed in 0.04652714729309082 seconds.





In [33]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS)
!sahi coco evaluate --dataset_json_path './subset_vis_test_data_1162.json' --result_json_path './runs/predict/exp178/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.083
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.143
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.084
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.093
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.429
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.051
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.252
 Average Precision  (AP) @[ IoU=0.50:0.95 | 

**Take 2**

In [16]:
# Example usage: Adaptive-Optimized-NMS (OptNMS)
source_folder = './single_test/images'
json_path = "./subset_vis_test_data_1162.json"
slice_size = 512
result_preds_adapt_opt_nms_iou_size_2 = predict_sliced_images(source_folder, json_path, detection_model, slice_size)

*****************************************
File Name 0000011_05068_d_0000008
Intial Prediction time is: 25.83 ms
Object Density: 33
********* Slice Parameters ***********
Slice Width:  512
Slice Height:  512
Overlap Width Ratio:  0.25
Overlap Height Ratio:  0.25
POST PROCESS:  OptNMS
Performing prediction on 8 slices.
Original Prediction Count 93
Adaptive Filtered Prediction:  63
Final Bounding Box Count (OptNMS): 31
Adaptive Filtered Prediction:  8
Final Bounding Box Count (OptNMS): 6
Adaptive Filtered Prediction:  3
Final Bounding Box Count (OptNMS): 2
Adaptive Filtered Prediction:  1
Final Bounding Box Count (OptNMS): 1
Adaptive Filtered Prediction:  11
Final Bounding Box Count (OptNMS): 6
Adaptive Filtered Prediction:  1
Final Bounding Box Count (OptNMS): 1
Adaptive Filtered Prediction:  6
Final Bounding Box Count (OptNMS): 6
Slicing performed in 0.005602598190307617 seconds.
Prediction performed in 0.1577591896057129 seconds.
Sliced Prediction time is: 162.79 ms
Prediction results 

In [17]:
#USING Adaptive-Optimized-NMS-IoU METHOD (OptNMS)
!sahi coco evaluate --dataset_json_path './subset_vis_test_data_1162.json' --result_json_path './sliced_predictions/exp154/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.082
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.149
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.068
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.096
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.429
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.046
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.265
 Average Precision  (AP) @[ IoU=0.50:0.95 | 

In [18]:
result_predict_nms_3 = predict(source='./single_test',
                         dataset_json_path = './subset_vis_test_data_1162.json',
                         model_type = 'ultralytics',
                         model_path = 'models/yolov8/last.pt',
                         slice_height = 512,
                         slice_width = 512,
                         overlap_height_ratio = 0.5,
                         overlap_width_ratio = 0.5,
                         postprocess_type = "NMS",
                         verbose = 2
                        )

POST PROCESSING: NMS
indexing coco dataset annotations...


Loading coco annotations: 100%|██████████| 1/1 [00:00<00:00, 460.10it/s]
Performing inference on images:   0%|          | 0/1 [00:00<?, ?it/s]

Image Name: 0000011_05068_d_0000008
Image Size:  (1360, 765)
POST PROCESS:  NMS
Performing prediction on 10 slices.


Performing inference on images: 100%|██████████| 1/1 [00:00<00:00,  2.91it/s]

Final Bounding Box Count (NMS):  34
Final Bounding Box Count (NMS):  9
Final Bounding Box Count (NMS):  3
Final Bounding Box Count (NMS):  3
Final Bounding Box Count (NMS):  7
Final Bounding Box Count (NMS):  2
Final Bounding Box Count (NMS):  10
Prediction time is: 236.71 ms
Prediction results are successfully exported to runs/predict/exp179
Model loaded in 0.05026555061340332 seconds.
Slicing performed in 0.0010750293731689453 seconds.
Prediction performed in 0.2367095947265625 seconds.
Exporting performed in 0.04619407653808594 seconds.





In [19]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS)
!sahi coco evaluate --dataset_json_path './subset_vis_test_data_1162.json' --result_json_path './runs/predict/exp179/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.083
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.143
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.084
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.093
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.429
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.051
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.252
 Average Precision  (AP) @[ IoU=0.50:0.95 | 

In [28]:
import numpy as np
from tqdm import tqdm
from sahi.utils.cv import read_image_as_pil, visualize_object_predictions
# export visualizations with ground truths
output_dir = './runs/predict/exp_test'
image = './single_test/images/0000011_05068_d_0000008.jpg'
image_as_pil = read_image_as_pil(image)
color = (0, 255, 0)  # original annotations in green
visual_bbox_thickness = 2
visual_text_size = 12
visual_text_thickness = 10
visual_hide_labels = 1
visual_hide_conf = 1
file_name = 'result_gt'
filename_prediction = 'prediction_visual_with_gt'
visual_export_format = 'png'

result = visualize_object_predictions(
    np.ascontiguousarray(image_as_pil),
    object_prediction_list=object_prediction_gt_list, #result2.object_prediction_list,
    rect_th=visual_bbox_thickness,
    text_size=visual_text_size,
    text_th=visual_text_thickness,
    color=color,
    hide_labels=visual_hide_labels,
    hide_conf=visual_hide_conf,
    output_dir=output_dir,
    file_name=file_name,
    export_format= visual_export_format,
)

Image(f'/content/drive/MyDrive/sahi/runs/predict/exp_test/result_gt.png')

TypeError: 'module' object is not callable

In [20]:
ls ./single_test/images/0000011_05068_d_0000008.jpg

0000011_05068_d_0000008.jpg


In [None]:
import numpy as np
from tqdm import tqdm
from sahi.utils.cv import read_image_as_pil, visualize_object_predictions
from IPython.display import Image

# export visualizations with ground truths
output_dir = 'runs/predict/exp_test'
image = 'test_data/0000006_06773_d_0000018.jpg'
image_as_pil = read_image_as_pil(image)
color = (0, 255, 0)  # original annotations in green
visual_bbox_gt_thickness = 3
visual_bbox_thickness = 2
visual_text_size = 12
visual_text_thickness = 10
visual_hide_labels = 1
visual_hide_conf = 1
file_name = 'result_gt'
filename_prediction = 'prediction_visual_with_gt'
visual_export_format = 'png'

result = visualize_object_predictions(
    np.ascontiguousarray(image_as_pil),
    object_prediction_list= result2.object_prediction_list,
    rect_th=visual_bbox_gt_thickness,
    text_size=visual_text_size,
    text_th=visual_text_thickness,
    color=color,
    hide_labels=visual_hide_labels,
    hide_conf=visual_hide_conf,
    output_dir=None,
    file_name=None,
    export_format= visual_export_format,
)
color = (255, 0, 0)  # model predictions in red
_ = visualize_object_predictions(
    result["image"],
    object_prediction_list=result_sahi.object_prediction_list,
    rect_th=visual_bbox_thickness,
    text_size=visual_text_size,
    text_th=visual_text_thickness,
    color=color,
    hide_labels=visual_hide_labels,
    hide_conf=visual_hide_conf,
    output_dir=output_dir,
    file_name=filename_prediction,
    export_format=visual_export_format,
)

Image(f'runs/predict/exp_test/prediction_visual_with_gt.png')

In [22]:
import json
import cv2
import matplotlib.pyplot as plt

# Load the JSON annotation file
json_path = "../data/VisDrone2COCO/COCO/annotations/visdrone_coco_test.json"
with open(json_path, "r") as file:
    data = json.load(file)

image_id = 1162 # Change this to your target image_id

def get_annotations(image_id):
    return [anno for anno in data["annotations"] if anno["image_id"] == image_id]

annotations = get_annotations(image_id)


In [33]:
result_predict = predict(source='./single_test',
                         dataset_json_path = './subset_vis_test_data_1162.json',
                         model_type = 'ultralytics',
                         model_path = 'models/yolov8/last.pt',
                         slice_height = 512,
                         slice_width = 512,
                         overlap_height_ratio = 0.5,
                         overlap_width_ratio = 0.5,
                         postprocess_type = "OptNMS",
                         verbose = 2,
                         batch_size = 4
                         #model_config_path = config.yaml
                        )

POST PROCESSING: OptNMS
indexing coco dataset annotations...


Loading coco annotations: 100%|██████████| 1/1 [00:00<00:00, 495.20it/s]
Performing inference on images:   0%|          | 0/1 [00:00<?, ?it/s]

Image Name: 0000011_05068_d_0000008
Image Size:  (1360, 765)
POST PROCESS:  OptNMS
Performing prediction on 10 slices.


Performing inference on images: 100%|██████████| 1/1 [00:00<00:00,  2.93it/s]

Original Prediction Count 148
Adaptive Filtered Prediction:  68
Final Bounding Box Count (OptNMS): 32
Adaptive Filtered Prediction:  7
Final Bounding Box Count (OptNMS): 5
Adaptive Filtered Prediction:  5
Final Bounding Box Count (OptNMS): 3
Adaptive Filtered Prediction:  2
Final Bounding Box Count (OptNMS): 2
Adaptive Filtered Prediction:  13
Final Bounding Box Count (OptNMS): 6
Adaptive Filtered Prediction:  2
Final Bounding Box Count (OptNMS): 1
Adaptive Filtered Prediction:  7
Final Bounding Box Count (OptNMS): 7
Prediction time is: 232.94 ms
Prediction results are successfully exported to runs/predict/exp180
Model loaded in 0.30643177032470703 seconds.
Slicing performed in 0.0011594295501708984 seconds.
Prediction performed in 0.23293519020080566 seconds.
Exporting performed in 0.04785895347595215 seconds.





In [34]:
#USING Adaptive-Optimized-NMS-IoU METHOD (NMS)
!sahi coco evaluate --dataset_json_path './subset_vis_test_data_1162.json' --result_json_path './runs/predict/exp180/result.json'

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!

Evaluating bbox...
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.01s).
Accumulating evaluation results...
DONE (t=0.00s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=500 ] = 0.070
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=500 ] = 0.121
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=500 ] = 0.084
 Average Precision  (AP) @[ IoU=0.50      | area= small | maxDets=500 ] = 0.069
 Average Precision  (AP) @[ IoU=0.50      | area=medium | maxDets=500 ] = 0.483
 Average Precision  (AP) @[ IoU=0.50      | area= large | maxDets=500 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=500 ] = 0.040
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=500 ] = 0.255
 Average Precision  (AP) @[ IoU=0.50:0.95 | 