In [5]:
import numpy as np
import cv2
from ultralytics import YOLO # Assuming this is your YOLOv10 implementation
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # To ensure GPU usage

# Step 1: Slice Image
def slice_image(image, slice_size, overlap):
    """
    Divide the image into overlapping slices.

    Args:
        image (numpy.ndarray): Input image.
        slice_size (int): Size of each slice (width and height).
        overlap (float): Overlap ratio between slices (0 to 1).

    Returns:
        slices (list): List of image slices.
        coordinates (list): List of top-left coordinates of each slice.
    """
    h, w, _ = image.shape
    step = int(slice_size * (1 - overlap))
    slices = []
    coordinates = []

    for y in range(0, h, step):
        for x in range(0, w, step):
            slice_img = image[y:y + slice_size, x:x + slice_size]
            slices.append(slice_img)
            coordinates.append((x, y))

    return slices, coordinates

# Step 2: Infer Slices
def resize_to_nearest_compatible_size(image, target_size=480):
    """
    Resize the image to the nearest size divisible by 32.
    
    Args:
        image (numpy.ndarray): Input image to be resized.
        target_size (int): Target size for resizing (e.g., 640).
    
    Returns:
        numpy.ndarray: Resized image with dimensions divisible by 32.
    """
    height, width = image.shape[:2]
    new_width = (width // 32) * 32
    new_height = (height // 32) * 32
    
    if new_width < target_size:
        new_width += 32
    if new_height < target_size:
        new_height += 32
    
    return cv2.resize(image, (new_width, new_height))

def infer_slices(model, slices):
    detections = []
    for slice_img in slices:
        # Resize the slice to the nearest compatible size (divisible by 32)
        resized_slice = resize_to_nearest_compatible_size(slice_img, target_size=640)
        
        # Convert image to tensor and move to GPU
        tensor_img = torch.from_numpy(resized_slice).permute(2, 0, 1).unsqueeze(0).float().cuda()
        
        # Run inference
        results = model.predict(tensor_img)  # Perform inference
        
        # Extract bounding boxes from the results object
        boxes = results[0].boxes  # This is where the boxes are stored
        
        # Get the boxes in [x1, y1, x2, y2, confidence, class_id] format
        slice_detections = []
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()  # Convert to numpy array
            confidence = box.conf[0].cpu().numpy()  # Confidence score
            class_id = int(box.cls[0].cpu().numpy())  # Class ID
            
            slice_detections.append([x1, y1, x2, y2, confidence, class_id])
        
        detections.append(slice_detections)
    
    return detections



# Step 3: Aggregate Results
def aggregate_results(detections, coordinates, slice_size, image_shape, iou_threshold):
    all_detections = []

    for det, coord in zip(detections, coordinates):
        x_offset, y_offset = coord
        for bbox in det:
            x1, y1, x2, y2, score, class_id = bbox
            # Map detections back to the original image coordinates
            x1 += x_offset
            y1 += y_offset
            x2 += x_offset
            y2 += y_offset
            all_detections.append([x1, y1, x2, y2, score, class_id])

    # Convert to numpy array
    all_detections = np.array(all_detections)

    # Apply Non-Max Suppression
    final_detections = non_max_suppression(all_detections, iou_threshold)

    return final_detections


# Utility: Non-Max Suppression
def non_max_suppression(detections, iou_threshold):
    """
    Apply Non-Max Suppression (NMS) to filter overlapping boxes.

    Args:
        detections (numpy.ndarray): Array of detections.
        iou_threshold (float): IOU threshold for NMS.

    Returns:
        numpy.ndarray: Filtered detections after NMS.
    """
    if len(detections) == 0:
        return detections

    x1 = detections[:, 0]
    y1 = detections[:, 1]
    x2 = detections[:, 2]
    y2 = detections[:, 3]
    scores = detections[:, 4]

    indices = scores.argsort()[::-1]
    keep = []

    while len(indices) > 0:
        current = indices[0]
        keep.append(current)
        rest = indices[1:]

        # Compute IOU
        xx1 = np.maximum(x1[current], x1[rest])
        yy1 = np.maximum(y1[current], y1[rest])
        xx2 = np.minimum(x2[current], x2[rest])
        yy2 = np.minimum(y2[current], y2[rest])

        inter_area = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1)
        box_area = (x2 - x1) * (y2 - y1)
        union_area = box_area[current] + box_area[rest] - inter_area

        iou = inter_area / (union_area + 1e-6)
        indices = rest[iou < iou_threshold]

    return detections[keep]

def annotate_image(image, detections):
    """
    Annotates the image with bounding boxes.
    
    Args:
        image (numpy.ndarray): The original image.
        detections (list): List of detections with format [x1, y1, x2, y2, confidence, class_id].
        
    Returns:
        numpy.ndarray: The annotated image with bounding boxes.
    """
    for detection in detections:
        x1, y1, x2, y2, score, class_id = detection
        
        # Draw rectangle
        color = (0, 255, 0)  # Green color for bounding box (you can choose different colors)
        thickness = 2
        image = cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), color, thickness)
        
        # Draw label (class name + confidence)
        # make the label font blue and thinner
        label = f"{model.names[class_id]} {score:.2f}"
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.4
        font_thickness = 1
        label_size = cv2.getTextSize(label, font, font_scale, font_thickness)[0]
        label_x = int(x1)
        label_y = int(y1) - 10
        image = cv2.putText(image, label, (label_x, label_y), font, font_scale, color, font_thickness)
    
    return image


# Example Usage
if __name__ == "__main__":
    img_path = r"C:\Users\Abhim\OneDrive\Pictures\Screenshots\test_img.png"
    image = cv2.imread(img_path)
    slice_size = 480
    overlap = 0
    iou_threshold = 0.5

    # Initialize YOLOv10 model
    path = r"C:\Users\Abhim\Downloads\yolov10l_81eps_960.pt"
    # path = r"C:\Users\Abhim\Downloads\yolov10l_81eps_960_quantized_int8.pt"
    
    model = YOLO(path).cuda()  # Ensure the model is on GPU

    #resize the image so that the width is 960 and the height is scaled accordingly
    image = cv2.resize(image, (960, int(960 * image.shape[0] / image.shape[1])))

    # Slice the image
    slices, coordinates = slice_image(image, slice_size, overlap)

    # Perform inference on slices
    detections = infer_slices(model, slices)

    # Aggregate results
    final_detections = aggregate_results(detections, coordinates, slice_size, image.shape, iou_threshold)
    annotated_image = annotate_image(image.copy(), final_detections)

    # Display the annotated image
    cv2.imshow("Annotated Image", annotated_image)
    cv2.waitKey(0)  # Wait for a key press to close the window
    cv2.destroyAllWindows()

    # Optionally, save the annotated image to a file
    cv2.imwrite("annotated_image.png", annotated_image)

    print("Final Detections:", final_detections)



0: 512x512 14 cars, 2 longvehicles, 22.0ms
Speed: 0.0ms preprocess, 22.0ms inference, 1.0ms postprocess per image at shape (1, 3, 512, 512)

0: 512x512 6 persons, 23 cars, 3 longvehicles, 23.0ms
Speed: 0.0ms preprocess, 23.0ms inference, 1.0ms postprocess per image at shape (1, 3, 512, 512)

0: 96x512 3 cars, 21.5ms
Speed: 0.0ms preprocess, 21.5ms inference, 1.0ms postprocess per image at shape (1, 3, 96, 512)

0: 96x512 (no detections), 25.0ms
Speed: 0.0ms preprocess, 25.0ms inference, 0.0ms postprocess per image at shape (1, 3, 96, 512)
Final Detections: [[     398.22      440.03      498.42      493.44     0.93618           1]
 [     363.93      307.44      445.18      343.35     0.92784           1]
 [     496.71      289.54      535.51      332.68     0.92779           1]
 [     211.46         480      324.98      539.14     0.92773           1]
 [     193.24      399.57      289.22      451.55     0.91905           1]
 [      223.5      430.58      326.46      474.85     0.91734