In [1]:
# Import required functions and classes
from sahi import AutoDetectionModel
from sahi.utils.cv import read_image, read_image_as_pil
from sahi.utils.file import Path, increment_path, list_files, save_json, save_pickle, download_from_url
from sahi.predict import get_prediction, get_sliced_prediction, predict, agg_prediction, get_prediction_batched, get_sliced_prediction_batched, get_batch_predict
#from sahi.predict_batch import get_prediction, get_sliced_prediction, predict, agg_prediction
from sahi.prediction import visualize_object_predictions
from IPython.display import Image
from numpy import asarray
import cv2
import os
import numpy as np
from tqdm import tqdm
import time

In [2]:
# Download YOLOv8-S model to 'models/yolov8s.pt'
yolov11_model_path = 'models/yolo11/last.pt'
yolov8_model_path = 'models/yolov8/last.pt'
#download_yolov8s_model(destination_path=yolov8_model_path)

In [3]:
detection_model = AutoDetectionModel.from_pretrained(
    model_type='ultralytics',
    model_path=yolov8_model_path,
    confidence_threshold=0.3,
    device="cuda:0", # or 'cpu'
)

In [4]:
import cv2
from PIL import Image

def get_slice_parameters(object_density):
    
    #image_path = "test_data/0000006_06773_d_0000018.jpg"
    #image = Image.open(image_path).convert("RGB")
    #image_width, image_height  = image.size
    #slice_width = image_width
    #slice_height = image_height
    #overlap_width_ratio = 0.0
    #overlap_height_ratio = 0.0

    if object_density >= 50:
        slice_width = 512
        slice_height = 512
        overlap_width_ratio = 0.5
        overlap_height_ratio = 0.5
    elif 25 <= object_density < 50:
        slice_width = 512
        slice_height = 512
        overlap_width_ratio = 0.25
        overlap_height_ratio = 0.25
    elif 10 <= object_density < 25:
        slice_width = 512
        slice_height = 512
        overlap_width_ratio = 0.15
        overlap_height_ratio = 0.15
    else:
        slice_width = 1024
        slice_height = 1024
        overlap_width_ratio = 0.15
        overlap_height_ratio = 0.15

    return slice_width, slice_height, overlap_width_ratio, overlap_height_ratio

In [9]:
pwd

'/mmfs1/scratch/dsu.local/bshakya/scratch/bshakya/sahi'

In [12]:
input_folder = 'single_test/images'
for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(input_folder, filename)
            #image_as_pil = read_image_as_pil(image_path)
            filename_without_ext = Path(filename).stem
            
            print("*****************************************")
            print("Image Path: ", image_path)
            print("File Name", filename_without_ext)

            #img_id = get_image_id(data, filename_without_ext)
            #image_ids.append(image_id)

            # Get predictions from your detection model
            time_start = time.time()
            prediction = get_prediction(image_path, detection_model, verbose=1) #changes
            time_end = time.time() - time_start
            print("Initial Prediction time is: {:.2f} ms".format(time_end * 1000))
            
            object_density = len(prediction.object_prediction_list)
            print("Object Density:", object_density)

*****************************************
Image Path:  single_test/images/0000078_03171_d_0000009.jpg
File Name 0000078_03171_d_0000009
Prediction performed in 0.013099908828735352 seconds.
Initial Prediction time is: 23.55 ms
Object Density: 55


In [13]:
input_folder = 'single_test/images'
for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(input_folder, filename)
            #image_as_pil = read_image_as_pil(image_path)
            filename_without_ext = Path(filename).stem
            
            print("*****************************************")
            print("Image Path: ", image_path)
            print("File Name", filename_without_ext)

            #img_id = get_image_id(data, filename_without_ext)
            #image_ids.append(image_id)

            # Get predictions from your detection model
            time_start = time.time()
            prediction = get_prediction_batched(image_path, detection_model, verbose=1) #changes
            time_end = time.time() - time_start
            print("Initial Prediction time is: {:.2f} ms".format(time_end * 1000))
            
            object_density = len(prediction.object_prediction_list)
            print("Object Density:", object_density)

*****************************************
Image Path:  single_test/images/0000078_03171_d_0000009.jpg
File Name 0000078_03171_d_0000009
Image Type 1 
Length of SHIFT AMOUNT:  [[0, 0]]
SHIFT AMOUNT:  [[0, 0]]
FULL SHAPE:  [None]
Prediction performed in 0.013222932815551758 seconds.
Initial Prediction time is: 23.80 ms
Object Density: 55


In [47]:
input_folder = 'single_test/images_bk'
for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(input_folder, filename)
            #image_as_pil = read_image_as_pil(image_path)
            filename_without_ext = Path(filename).stem
            
            print("*****************************************")
            print("Image Path: ", image_path)
            print("File Name", filename_without_ext)

            #img_id = get_image_id(data, filename_without_ext)
            #image_ids.append(image_id)

            # Get predictions from your detection model
            time_start = time.perf_counter()
            prediction = get_prediction(image_path, detection_model, shift_amount = [250, 250],full_shape = [image_height, image_width], verbose=1) #changes
            time_end = time.perf_counter() - time_start
            print("Initial Prediction time is: {:.2f} ms".format(time_end * 1000))
            
            object_density = len(prediction.object_prediction_list)
            print("Object Density:", object_density)

*****************************************
Image Path:  single_test/images_bk/0000006_06773_d_0000018.jpg
File Name 0000006_06773_d_0000018
Prediction performed in 0.013345479965209961 seconds.
Initial Prediction time is: 24.49 ms
Object Density: 37


In [31]:
input_folder = 'single_test/images_bk'
for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(input_folder, filename)
            image_as_pil = read_image_as_pil(image_path)
            filename_without_ext = Path(filename).stem
            image_width, image_height = image_as_pil.size
            print("*****************************************")
            print("image_width, image_height", image_width, image_height)
            print("Image Path: ", image_path)
            print("File Name", filename_without_ext)

            #img_id = get_image_id(data, filename_without_ext)
            #image_ids.append(image_id)

            # Get predictions from your detection model
            time_start = time.perf_counter()
            prediction = get_prediction_batched(image_path, detection_model,  shift_amount_list = [250, 250], full_shape = [image_height, image_width], verbose=1) #changes
            time_end = time.perf_counter() - time_start
            print("Initial Prediction time is: {:.2f} ms".format(time_end * 1000))
            
            object_density = len(prediction.object_prediction_list)
            print("Object Density:", object_density)

*****************************************
image_width, image_height 1360 765
Image Path:  single_test/images_bk/0000006_06773_d_0000018.jpg
File Name 0000006_06773_d_0000018
Image Type 1 
Length of SHIFT AMOUNT:  [250, 250]
SHIFT AMOUNT:  [250, 250]
FULL SHAPE:  [[765, 1360], [765, 1360]]
Prediction performed in 0.012885332107543945 seconds.
Initial Prediction time is: 23.74 ms
Object Density: 37


In [33]:
%pip install ultralytics
import ultralytics
ultralytics.checks()

Ultralytics 8.3.91 🚀 Python-3.9.18 torch-2.1.2+cu118 CUDA:0 (NVIDIA A100 80GB PCIe, 81229MiB)
Setup complete ✅ (48 CPUs, 503.3 GB RAM, 26.7/430.3 GB disk)


In [45]:
from ultralytics import YOLO

# Load a model
#model = YOLO('yolov8n.yaml')  # build a new model from scratch
model = YOLO('models/yolov8/last.pt')  # load a pretrained model (recommended for training)

# Train the model
time_start = time.perf_counter()
results = model.predict('single_test/images/0000078_03171_d_0000009.jpg')
time_end = time.perf_counter() - time_start
print("YOLO Prediction time is: {:.2f} ms".format(time_end * 1000))


image 1/1 /mmfs1/scratch/dsu.local/bshakya/scratch/bshakya/sahi/single_test/images/0000078_03171_d_0000009.jpg: 384x640 9 pedestrians, 1 car, 51 trucks, 1 tricycle, 4.7ms
Speed: 1.3ms preprocess, 4.7ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)
YOLO Prediction time is: 76.68 ms


In [39]:
# Get the bounding boxes and classes
bounding_boxes = results[0].boxes  # [x, y, width, height] for each detected object
object_count = len(bounding_boxes)  # Number of detected objects

In [40]:
object_count

62

In [43]:
time_start = time.time()
results = model.predict(source='test_vis_data/images', batch=4)
time_end = time.time() - time_start
print("YOLO Prediction time is: {:.2f} ms".format(time_end * 1000))


image 1/15 /mmfs1/scratch/dsu.local/bshakya/scratch/bshakya/sahi/test_vis_data/images/0000011_04202_d_0000007.jpg: 640x640 7 pedestrians, 3 peoples, 1 bicycle, 2 cars, 6 trucks, 2 tricycles, 3 awning-tricycles, 2 buss, 1 motor, 60.5ms
image 2/15 /mmfs1/scratch/dsu.local/bshakya/scratch/bshakya/sahi/test_vis_data/images/0000054_00786_d_0000001.jpg: 640x640 80 pedestrians, 1 car, 3 motors, 60.5ms
image 3/15 /mmfs1/scratch/dsu.local/bshakya/scratch/bshakya/sahi/test_vis_data/images/0000074_07850_d_0000015.jpg: 640x640 57 pedestrians, 2 peoples, 3 bicycles, 4 cars, 1 motor, 60.5ms
image 4/15 /mmfs1/scratch/dsu.local/bshakya/scratch/bshakya/sahi/test_vis_data/images/0000074_08202_d_0000016.jpg: 640x640 70 pedestrians, 1 people, 1 tricycle, 60.5ms
image 5/15 /mmfs1/scratch/dsu.local/bshakya/scratch/bshakya/sahi/test_vis_data/images/0000078_01314_d_0000004.jpg: 640x640 (no detections), 1.1ms
image 6/15 /mmfs1/scratch/dsu.local/bshakya/scratch/bshakya/sahi/test_vis_data/images/0000078_06777_d