In [1]:
#Import required functions and classes
from sahi import AutoDetectionModel
from sahi.utils.cv import read_image, read_image_as_pil
from sahi.utils.file import Path, increment_path, list_files, save_json, save_pickle, download_from_url
from sahi.predict import get_prediction, get_sliced_prediction, predict, agg_prediction, predict 
from sahi.prediction import visualize_object_predictions
from sahi.postprocess.utils import ObjectPredictionList, has_match, merge_object_prediction_pair
from IPython.display import Image
from numpy import asarray
import cv2
import os
import time
from PIL import Image
import json
import numpy as np
from tqdm import tqdm
import time
from sahi.prediction import ObjectPrediction, PredictionResult
from pathlib import Path
from multiprocessing import Pool, cpu_count
import torch
from torchvision.ops import nms, clip_boxes_to_image

In [7]:
yolov8_model_path = '../models/yolov8/last.pt'

In [8]:
detection_model = AutoDetectionModel.from_pretrained(
    model_type='ultralytics',
    model_path=yolov8_model_path,
    confidence_threshold=0.3,
    device="cuda:0", # or 'cpu'
)

In [4]:
def get_slice_parameters(object_density, slice_size):
    
    if object_density >= 50:
        #slice_size = min_dim // 4
        slice_width = slice_size
        slice_height = slice_size
        overlap_width_ratio = 0.5
        overlap_height_ratio = 0.5
    elif 25 <= object_density < 50:
        #slice_size = min_dim // 2
        slice_width = slice_size
        slice_height = slice_size
        overlap_width_ratio = 0.25
        overlap_height_ratio = 0.25
    elif 10 <= object_density < 25:
        #slice_size = min_dim // 2
        slice_width = slice_size
        slice_height = slice_size
        overlap_width_ratio = 0.15
        overlap_height_ratio = 0.15
    else:
        return

    return slice_width, slice_height, overlap_width_ratio, overlap_height_ratio

In [5]:
def merged_preds_to_coco(preds, image_id):
    coco_predictions = []
    for pred in preds:
        try:
            coco = pred.to_coco_prediction(image_id=image_id)
            coco_dict = {
                "image_id": coco.image_id,
                "bbox": coco.bbox,
                "score": coco.score,
                "category_id": coco.category_id,
                "segmentation": coco.segmentation,
                "iscrowd": coco.iscrowd,
                "area": coco.area,
            }
            # Optionally include category_name (not used in COCO eval)
            # coco_dict["category_name"] = coco.category_name

            # Only add if bbox is valid (non-empty)
            if coco_dict["bbox"]:
                coco_predictions.append(coco_dict)
        except Exception as e:
            print(f"Failed to convert to COCO dict: {e}")
    return coco_predictions

In [6]:
def nms_merge(predictions, iou_threshold=0.5):
    if not predictions:
        return []
    boxes = torch.tensor([p.bbox.to_xyxy() for p in predictions])
    scores = torch.tensor([p.score.value for p in predictions])
    keep_indices = nms(boxes, scores, iou_threshold).tolist()
    return [predictions[i] for i in keep_indices]

In [9]:
# Apply Non-Maximum Suppression (NMS)
def apply_nms(detections, iou_threshold=0.5):
    boxes, scores, labels = [], [], []
    for det in detections:
        boxes.append(det['box'])
        scores.append(det['confidence'])
        labels.append(det['class_id'])
    boxes = torch.tensor(boxes, dtype=torch.float32)
    scores = torch.tensor(scores, dtype=torch.float32)
    labels = torch.tensor(labels)

    keep_indices = []
    unique_labels = labels.unique()
    for label in unique_labels:
        idxs = (labels == label).nonzero(as_tuple=False).squeeze(1)
        class_boxes = boxes[idxs]
        class_scores = scores[idxs]
        keep = nms(class_boxes, class_scores, iou_threshold)
        keep_indices.extend(idxs[keep].tolist())

    return [detections[i] for i in keep_indices]

In [7]:
# Function to get image details by image_id
def get_image_id(coco_data, image_name):
    for image in coco_data["images"]:
        file_name = Path(image['file_name']).stem
        if file_name == image_name:
            return image['id']
    return None

In [None]:
#rt-detr
input_folder = 'testdev_subsets/medium/v1/images'
for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(input_folder, filename)
            #image_as_pil = read_image_as_pil(image_path)
            filename_without_ext = Path(filename).stem
            
            print("*****************************************")
            print("File Name", filename_without_ext)

            #img_id = get_image_id(data, filename_without_ext)
            #image_ids.append(image_id)

            # Get predictions from your detection model
            #time_start = time.time()
            prediction = get_prediction(image_path, detection_model, verbose=1) #changes
            #time_end = time.time() - time_start
            #print(f"Prediction Performed in {time_end} seconds")
            object_density = len(prediction.object_prediction_list)
            print("Object Density:", object_density)

In [22]:
#yolov8
input_folder = 'testdev_subsets/medium/v1/images'
for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(input_folder, filename)
            #image_as_pil = read_image_as_pil(image_path)
            filename_without_ext = Path(filename).stem
            
            print("*****************************************")
            print("File Name", filename_without_ext)

            #img_id = get_image_id(data, filename_without_ext)
            #image_ids.append(image_id)

            # Get predictions from your detection model
            #time_start = time.time()
            prediction = get_prediction(image_path, detection_model, verbose=1) #changes
            #time_end = time.time() - time_start
            #print(f"Prediction Performed in {time_end} seconds")
            
            object_density = len(prediction.object_prediction_list)
            print("Object Density:", object_density)

*****************************************
File Name 9999941_00000_d_0000004
Prediction performed in 0.015785694122314453 seconds.
Object Density: 28
*****************************************
File Name 9999938_00000_d_0000009
Prediction performed in 0.013902902603149414 seconds.
Object Density: 26
*****************************************
File Name 9999986_00000_d_0000012
Prediction performed in 0.08486056327819824 seconds.
Object Density: 19
*****************************************
File Name 0000063_06000_d_0000007
Prediction performed in 0.009827613830566406 seconds.
Object Density: 30
*****************************************
File Name 9999938_00000_d_0000299
Prediction performed in 0.013421773910522461 seconds.
Object Density: 30
*****************************************
File Name 9999938_00000_d_0000031
Prediction performed in 0.013299703598022461 seconds.
Object Density: 31
*****************************************
File Name 0000164_01068_d_0000162
Prediction performed in 0.00941

In [33]:
#yolov11
input_folder = 'testdev_subsets/medium/v1/images'
for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(input_folder, filename)
            #image_as_pil = read_image_as_pil(image_path)
            filename_without_ext = Path(filename).stem
            
            print("*****************************************")
            print("File Name", filename_without_ext)

            #img_id = get_image_id(data, filename_without_ext)
            #image_ids.append(image_id)

            # Get predictions from your detection model
            #time_start = time.time()
            prediction = get_prediction(image_path, detection_model, verbose=1) #changes
            #time_end = time.time() - time_start
            #print(f"Prediction Performed in {time_end} seconds")
            
            object_density = len(prediction.object_prediction_list)
            print("Object Density:", object_density)

*****************************************
File Name 9999941_00000_d_0000004
Prediction performed in 0.737584114074707 seconds.
Object Density: 27
*****************************************
File Name 9999938_00000_d_0000009
Prediction performed in 0.015390872955322266 seconds.
Object Density: 27
*****************************************
File Name 9999986_00000_d_0000012
Prediction performed in 0.056285858154296875 seconds.
Object Density: 25
*****************************************
File Name 0000063_06000_d_0000007
Prediction performed in 0.011892318725585938 seconds.
Object Density: 31
*****************************************
File Name 9999938_00000_d_0000299
Prediction performed in 0.014685869216918945 seconds.
Object Density: 26
*****************************************
File Name 9999938_00000_d_0000031
Prediction performed in 0.014840364456176758 seconds.
Object Density: 30
*****************************************
File Name 0000164_01068_d_0000162
Prediction performed in 0.0109727

In [40]:
#yolov8
input_folder = 'testdev_subsets/sparse/v1/images'
for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(input_folder, filename)
            #image_as_pil = read_image_as_pil(image_path)
            filename_without_ext = Path(filename).stem
            
            print("*****************************************")
            print("File Name", filename_without_ext)

            #img_id = get_image_id(data, filename_without_ext)
            #image_ids.append(image_id)

            # Get predictions from your detection model
            #time_start = time.time()
            prediction = get_prediction(image_path, detection_model, verbose=1) #changes
            #time_end = time.time() - time_start
            #print(f"Prediction Performed in {time_end} seconds")
            
            object_density = len(prediction.object_prediction_list)
            print("Object Density:", object_density)

*****************************************
File Name 9999973_00000_d_0000054
Prediction performed in 0.07944846153259277 seconds.
Object Density: 14
*****************************************
File Name 9999986_00000_d_0000027
Prediction performed in 0.0166776180267334 seconds.
Object Density: 5
*****************************************
File Name 9999973_00000_d_0000126
Prediction performed in 0.01986527442932129 seconds.
Object Density: 8
*****************************************
File Name 9999938_00000_d_0000476
Prediction performed in 0.013657331466674805 seconds.
Object Density: 17
*****************************************
File Name 0000078_05467_d_0000015
Prediction performed in 0.012717485427856445 seconds.
Object Density: 3
*****************************************
File Name 9999952_00000_d_0000178
Prediction performed in 0.013298749923706055 seconds.
Object Density: 9
*****************************************
File Name 9999938_00000_d_0000322
Prediction performed in 0.013139486312

In [44]:
import pandas as pd
from scipy.stats import pearsonr

# -------------------------------
# Configuration: file names and column names
# -------------------------------
predicted_file = 'predicted_outputs.xlsx'      # Excel file containing predicted counts
ground_truth_file = 'medium_gt.xlsx'  # Excel file containing ground truth counts
output_file = 'output_analysis.xlsx'            # Output Excel file name

# Define the column names. Adjust these names if your files have different headers.
id_column = 'file_name'              # Column to merge on, e.g., an image identifier
predicted_count_column = 'predict_bbox_count'
ground_truth_count_column = 'gt_bbox_count'

# -------------------------------
# Read the Excel files
# -------------------------------
predicted_df = pd.read_excel(predicted_file)
ground_truth_df = pd.read_excel(ground_truth_file)

# -------------------------------
# Merge the dataframes on the common identifier
# -------------------------------
merged_df = pd.merge(predicted_df, ground_truth_df, on=id_column, how='inner')
print()
# -------------------------------
# Calculate the difference in counts
# -------------------------------
# Here the difference is calculated as ground truth minus predicted count.
merged_df['Difference'] = abs(merged_df[ground_truth_count_column] - merged_df[predicted_count_column])

# -------------------------------
# Calculate the Pearson correlation coefficient
# -------------------------------
pearson_corr, p_value = pearsonr(merged_df[predicted_count_column], merged_df[ground_truth_count_column])
print(f"Pearson Correlation Coefficient: {pearson_corr}, P-value: {p_value}")

# Create a summary dataframe for correlation results
corr_df = pd.DataFrame({
    'Pearson_Correlation': [pearson_corr],
    'P_value': [p_value]
})

# -------------------------------
# Save the output to an Excel file with two sheets:
# - One sheet for the merged counts and differences.
# - One sheet for the correlation results.
# -------------------------------
with pd.ExcelWriter(output_file) as writer:
    merged_df.to_excel(writer, sheet_name='Counts_and_Differences', index=False)
    corr_df.to_excel(writer, sheet_name='Correlation', index=False)

print(f"Results successfully saved to {output_file}")



Pearson Correlation Coefficient: 0.09073230368456274, P-value: 0.5308941754499572
Results successfully saved to output_analysis.xlsx


In [41]:
import os
from pathlib import Path
import pandas as pd
from scipy.stats import pearsonr
import time

# -------------------------------
# Configuration: file paths and column names
# -------------------------------
input_folder = 'testdev_subsets/medium/v1/images'  # Folder with images
predicted_output_file = 'predicted_outputs.xlsx'      # Excel file with ground truth counts


# -------------------------------
# Step 1: Process images and obtain predictions
# -------------------------------
predicted_data = []

# Iterate through each file in the input folder
for filename in os.listdir(input_folder):
    # Check if the file is an image
    if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        image_path = os.path.join(input_folder, filename)
        filename_without_ext = Path(filename).stem
        
        print("*****************************************")
        print("File Name:", filename_without_ext)
        
        # Get predictions from your detection model.
        # It is assumed that 'get_prediction' and 'detection_model' are defined elsewhere.
        # Optionally, you can time this prediction step.
        # time_start = time.time()
        prediction = get_prediction(image_path, detection_model, verbose=1)
        # time_end = time.time() - time_start
        # print(f"Prediction performed in {time_end:.2f} seconds")
        
        # Calculate the object density (predicted count)
        object_density = len(prediction.object_prediction_list)
        print("Object Density:", object_density)
        
        # Store the result for this image
        predicted_data.append({
            'file_name': filename,
            'bbox_count': object_density
        })

# Convert the list of predicted counts to a DataFrame
predicted_df = pd.DataFrame(predicted_data)

# Save the merged DataFrame to an Excel file with one sheet
predicted_df.to_excel(predicted_output_file, sheet_name='Results', index=False)
print(f"Analysis results saved to {predicted_output_file}")


*****************************************
File Name: 9999941_00000_d_0000004
Prediction performed in 0.013935565948486328 seconds.
Object Density: 28
*****************************************
File Name: 9999938_00000_d_0000009
Prediction performed in 0.013425827026367188 seconds.
Object Density: 26
*****************************************
File Name: 9999986_00000_d_0000012
Prediction performed in 0.016100168228149414 seconds.
Object Density: 19
*****************************************
File Name: 0000063_06000_d_0000007
Prediction performed in 0.010104179382324219 seconds.
Object Density: 30
*****************************************
File Name: 9999938_00000_d_0000299
Prediction performed in 0.012789249420166016 seconds.
Object Density: 30
*****************************************
File Name: 9999938_00000_d_0000031
Prediction performed in 0.013325214385986328 seconds.
Object Density: 31
*****************************************
File Name: 0000164_01068_d_0000162
Prediction performed in

In [11]:
import os
from pathlib import Path
import pandas as pd
from scipy.stats import pearsonr
import time

# -------------------------------
# Configuration: file paths and column names
# -------------------------------
input_folder = 'testdev_subsets/dense/v1/images'  # Folder with images
predicted_output_file = 'predicted_outputs_dense.xlsx'      # Excel file with ground truth counts


# -------------------------------
# Step 1: Process images and obtain predictions
# -------------------------------
predicted_data = []

# Iterate through each file in the input folder
for filename in os.listdir(input_folder):
    # Check if the file is an image
    if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
        image_path = os.path.join(input_folder, filename)
        filename_without_ext = Path(filename).stem
        
        print("*****************************************")
        print("File Name:", filename_without_ext)
        
        # Get predictions from your detection model.
        # It is assumed that 'get_prediction' and 'detection_model' are defined elsewhere.
        # Optionally, you can time this prediction step.
        # time_start = time.time()
        prediction = get_prediction(image_path, detection_model, verbose=1)
        # time_end = time.time() - time_start
        # print(f"Prediction performed in {time_end:.2f} seconds")
        
        # Calculate the object density (predicted count)
        object_density = len(prediction.object_prediction_list)
        print("Object Density:", object_density)
        
        # Store the result for this image
        predicted_data.append({
            'file_name': filename,
            'bbox_count': object_density
        })

# Convert the list of predicted counts to a DataFrame
predicted_df = pd.DataFrame(predicted_data)

# Save the merged DataFrame to an Excel file with one sheet
predicted_df.to_excel(predicted_output_file, sheet_name='Results', index=False)
print(f"Analysis results saved to {predicted_output_file}")


*****************************************
File Name: 9999979_00000_d_0000047
Prediction performed in 0.21282553672790527 seconds.
Object Density: 72
*****************************************
File Name: 9999947_00000_d_0000022
Prediction performed in 0.016318798065185547 seconds.
Object Density: 27
*****************************************
File Name: 9999973_00000_d_0000012
Prediction performed in 0.08768987655639648 seconds.
Object Density: 34
*****************************************
File Name: 9999979_00000_d_0000043
Prediction performed in 0.016655683517456055 seconds.
Object Density: 62
*****************************************
File Name: 9999938_00000_d_0000189
Prediction performed in 0.014228343963623047 seconds.
Object Density: 23
*****************************************
File Name: 0000074_03738_d_0000007
Prediction performed in 0.019930124282836914 seconds.
Object Density: 52
*****************************************
File Name: 9999952_00000_d_0000131
Prediction performed in 0

In [12]:
prediction.object_prediction_list

[ObjectPrediction<
     bbox: BoundingBox: <(867.269775390625, 310.645263671875, 897.5737915039062, 340.4339294433594), w: 30.30401611328125, h: 29.788665771484375>,
     mask: None,
     score: PredictionScore: <value: 0.868489682674408>,
     category: Category: <id: 3, name: car>>,
 ObjectPrediction<
     bbox: BoundingBox: <(572.3723754882812, 285.0955810546875, 697.0071411132812, 377.30517578125), w: 124.634765625, h: 92.2095947265625>,
     mask: None,
     score: PredictionScore: <value: 0.8607072830200195>,
     category: Category: <id: 8, name: bus>>,
 ObjectPrediction<
     bbox: BoundingBox: <(705.3282470703125, 451.6910400390625, 748.6197509765625, 492.05279541015625), w: 43.29150390625, h: 40.36175537109375>,
     mask: None,
     score: PredictionScore: <value: 0.843823254108429>,
     category: Category: <id: 3, name: car>>,
 ObjectPrediction<
     bbox: BoundingBox: <(700.122802734375, 547.0645751953125, 794.6766357421875, 687.2840576171875), w: 94.5538330078125, h: 140