In [6]:
import cv2
import torch
import matplotlib.pyplot as plt
import numpy as np
import sys
sys.path.append('../yolov5')  # Update to your YOLOv5 path

from yolov5.models.common import DetectMultiBackend
from yolov5.utils.general import non_max_suppression
from yolov5.utils.torch_utils import select_device
%matplotlib inline



def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
    """
    Rescales bounding box coordinates from one image shape to another.

    Args:
        img1_shape (tuple): Shape of the resized image used for inference (height, width).
        coords (torch.Tensor): Bounding box coordinates as (x1, y1, x2, y2).
        img0_shape (tuple): Original image shape (height, width).
        ratio_pad (tuple, optional): Pre-computed gain and padding values.

    Returns:
        torch.Tensor: Scaled bounding box coordinates.
    """
    if ratio_pad is None:  # Calculate from shapes
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain = resized / original
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # width, height
    else:
        gain, pad = ratio_pad

    coords[:, [0, 2]] -= pad[0]  # x padding
    coords[:, [1, 3]] -= pad[1]  # y padding
    coords[:, :4] /= gain
    coords[:, :4].clamp_(min=0, max=max(img0_shape))  # Ensure coordinates stay within image bounds
    return coords




def resize_and_crop(image, target_size, alpha=1.0):
    orig_h, orig_w = image.shape[:2]
    target_w, target_h = target_size

    # Scaling
    scale = max(target_w / (orig_w * alpha), target_h / (orig_h * alpha))
    new_w, new_h = int(orig_w * scale), int(orig_h * scale)

    # Resizing
    resized_image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)

    # Center cropping
    x1 = (new_w - target_w) // 2
    y1 = (new_h - target_h) // 2
    cropped_image = resized_image[y1:y1 + target_h, x1:x1 + target_w]

    return cropped_image

# Set up device
device = select_device('')  # Automatically selects CUDA if available, otherwise CPU

# Load the trained model
model = DetectMultiBackend(
    '/Users/martindrieux/Documents/GitHub/INF573_Puzzle-1/yolov5/runs/train/exp2/weights/best.pt',
    device=device,
    dnn=False
)
print("Model loaded successfully!")

# Load and preprocess the image
image_path = '../nos_puzzles/yakari_pieces1.png'
yakari_pieces = cv2.imread(image_path)
yakari_pieces = cv2.cvtColor(yakari_pieces, cv2.COLOR_BGR2RGB)

target_size = (640, 640)
img_resized = resize_and_crop(yakari_pieces, target_size, alpha=0.9)

# Normalize and convert to PyTorch tensor
img_tensor = torch.from_numpy(img_resized).permute(2, 0, 1).unsqueeze(0).float().div(255.0).to(device)

# Run inference
predictions = model(img_tensor)

# Apply Non-Max Suppression (NMS)
results = non_max_suppression(predictions, conf_thres=0.05, iou_thres=0.45)

# Annotate and display results
annotated_image = img_resized.copy()
for det in results:
    if det is not None and len(det):
        # Scale coordinates back to the original resized image
        det[:, :4] = scale_coords(img_tensor.shape[2:], det[:, :4], annotated_image.shape).round()

        for *xyxy, conf, cls in det:
            # Draw bounding boxes and labels
            label = f'{model.names[int(cls)]} {conf:.2f}'
            x_min, y_min, x_max, y_max = map(int, xyxy)
            cv2.rectangle(annotated_image, (x_min, y_min), (x_max, y_max), color=(0, 255, 0), thickness=2)
            cv2.putText(
                annotated_image, label, (x_min, y_min - 10),
                cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=(0, 255, 0), thickness=2
            )

# Display the annotated image
plt.figure(figsize=(10, 10))
plt.imshow(annotated_image)
plt.axis('off')
plt.show()




ModuleNotFoundError: No module named 'yolov5'

In [None]:
def apply_model(img_resized, conf_t, iou_t):
    img_tensor = torch.from_numpy(img_resized).permute(2, 0, 1).unsqueeze(0).float().div(255.0).to(device)
    predictions = model(img_tensor)
    results = non_max_suppression(predictions, conf_thres=conf_t, iou_thres=iou_t)
    return predictions, results

def draw_boxes(img, results):
    annotated_image = img.copy()
    for det in results:
        if det is not None and len(det):
            det[:, :4] = scale_coords(img_tensor.shape[2:], det[:, :4], annotated_image.shape).round()
            for *xyxy, conf, cls in det:
                if len(xyxy) != 4:
                    print("Invalid detection skipped:", xyxy)  # Debugging output
                    continue  # Skip invalid detections
                
                label = f'{model.names[int(cls)]} {conf:.2f}'
                x_min, y_min, x_max, y_max = map(int, xyxy)
                cv2.rectangle(annotated_image, (x_min, y_min), (x_max, y_max), color=(0, 255, 0), thickness=2)
                cv2.putText(
                    annotated_image, label, (x_min, y_min - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=(0, 255, 0), thickness=2
                )
    return annotated_image


pred, res = apply_model(img_resized, 0.05, 0.45)
draw_boxes(img_resized, res)
plt.imshow(draw_boxes(img_resized, res))

<matplotlib.image.AxesImage at 0x3002d7a70>

In [None]:
def detect_and_aggregate(image, overlap=0.2, conf_thres=0.25, iou_thres=0.45, alpha=1.0):
    """
    Detect bounding boxes from overlapping sub-images and aggregate them into the original image.

    Args:
        image (numpy.ndarray): Input image in RGB format.
        overlap (float): Fractional overlap between sub-images.
        conf_thres (float): Confidence threshold for detection.
        iou_thres (float): IoU threshold for Non-Max Suppression.
        alpha (float): Scaling factor for resizing sub-images.
                      For alpha=1, sub-image is a square of the smallest dimension of the image.

    Returns:
        list: Aggregated bounding boxes in the format [x_min, y_min, x_max, y_max, confidence].
        numpy.ndarray: Annotated image with bounding boxes drawn.
    """
    orig_h, orig_w = image.shape[:2]
    smallest_dim = min(orig_h, orig_w)  # Use the smallest dimension as the base size
    sub_size = int(smallest_dim * alpha)  # Adjust sub-image size based on alpha

    # Ensure sub-size is valid
    min_sub_size = 32  # Minimum acceptable size
    sub_size = max(sub_size, min_sub_size)

    step = int(sub_size * (1 - overlap))  # Calculate the step size based on overlap
    step = max(step, 1)  # Ensure step size is at least 1 pixel

    all_boxes = []

    # Loop through sub-images
    for y in range(0, orig_h, step):
        for x in range(0, orig_w, step):
            # Adjust sub-image coordinates to stay within the image boundaries
            x_end = min(x + sub_size, orig_w)
            y_end = min(y + sub_size, orig_h)

            # Adjust overlap for sub-images near the edges
            if x + sub_size > orig_w:
                overlap_x = (x + sub_size - orig_w) / sub_size
                x = int(x - sub_size * overlap_x)  # Adjust x to increase overlap
            if y + sub_size > orig_h:
                overlap_y = (y + sub_size - orig_h) / sub_size
                y = int(y - sub_size * overlap_y)  # Adjust y to increase overlap

            # Define the sub-image
            sub_image = image[y:y_end, x:x_end]

            # Resize sub-image to match the required detection size
            sub_image_resized = cv2.resize(sub_image, (sub_size, sub_size))

            # Detect objects in the sub-image
            _, results = apply_model(sub_image_resized, conf_thres, iou_thres)

            # Adjust bounding boxes to original image coordinates
            for det in results:
                if det is not None and len(det):
                    det[:, :4] = scale_coords((sub_size, sub_size), det[:, :4], sub_image_resized.shape).round()
                    for *xyxy, conf, cls in det:
                        x_min, y_min, x_max, y_max = map(int, xyxy)
                        x_min += x
                        x_max += x
                        y_min += y
                        y_max += y
                        all_boxes.append([x_min, y_min, x_max, y_max, conf.item()])

    # Aggregate bounding boxes using Non-Max Suppression
    if len(all_boxes) > 0:
        boxes = torch.tensor(all_boxes)[:, :4]
        scores = torch.tensor(all_boxes)[:, 4]
        nms_indices = torch.ops.torchvision.nms(boxes, scores, iou_thres)
        all_boxes = np.array(all_boxes)[nms_indices.numpy()]

    # Annotate the original image
    annotated_image = image.copy()
    for x_min, y_min, x_max, y_max, conf in all_boxes:
        x_min, y_min, x_max, y_max = map(int, [x_min, y_min, x_max, y_max])
        label = f'Conf: {conf:.2f}'
        cv2.rectangle(annotated_image, (x_min, y_min), (x_max, y_max), color=(0, 255, 0), thickness=2)
        cv2.putText(
            annotated_image, label, (x_min, y_min - 10),
            cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=(0, 255, 0), thickness=2
        )

    return all_boxes, annotated_image


In [None]:
# Load the image
original_image = yakari_pieces
original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

# Test with small alpha values


bounding_boxes, annotated_image = detect_and_aggregate(
    original_image, overlap=0.2, conf_thres=0.05, iou_thres=0.45, alpha=1
)
plt.figure(figsize=(10, 10))
plt.imshow(annotated_image)
plt.axis('off')
plt.show()


In [None]:
import matplotlib.patches as patches
import matplotlib.pyplot as plt
from matplotlib.colors import to_rgba
import numpy as np

def visualize_sub_images(image, overlap=0.2, alpha=1.0):
    """
    Visualize sub-images by overlaying semi-transparent, colored boxes on the original image,
    accounting for a zooming parameter (alpha).

    Args:
        image (numpy.ndarray): Input image in RGB format.
        overlap (float): Fractional overlap between sub-images.
        alpha (float): Scaling parameter for sub-image size.
                      For alpha=1, sub-image is a square of the smallest dimension of the image.

    Returns:
        None: Displays the visualization.
    """
    orig_h, orig_w = image.shape[:2]
    smallest_dim = min(orig_h, orig_w)  # Use the smallest dimension as the base size
    sub_size = int(smallest_dim * alpha)  # Adjust sub-image size based on alpha

    # Ensure sub-size doesn't exceed the original dimensions
    sub_size = min(sub_size, orig_h, orig_w)

    step = int(sub_size * (1 - overlap))  # Calculate the step size based on overlap

    # Define a set of colors
    colors = [
        "red", "blue", "green", "orange", "purple", "yellow", "cyan", "magenta"
    ]
    color_count = len(colors)

    # Plot the original image
    fig, ax = plt.subplots(figsize=(10, 10))
    ax.imshow(image)
    ax.axis('off')

    # Loop through sub-images and draw boxes
    for idx_y, y in enumerate(range(0, orig_h, step)):
        for idx_x, x in enumerate(range(0, orig_w, step)):
            # Define the sub-image coordinates
            x_end = min(x + sub_size, orig_w)
            y_end = min(y + sub_size, orig_h)
            width = x_end - x
            height = y_end - y

            # Choose a color cyclically from the list
            color = colors[(idx_y * len(range(0, orig_w, step)) + idx_x) % color_count]

            # Create a semi-transparent rectangle
            rect = patches.Rectangle(
                (x, y), width, height,
                linewidth=1, edgecolor=color,
                facecolor=to_rgba(color, 0.3)  # Semi-transparent fill
            )
            ax.add_patch(rect)

    plt.show()


In [None]:
# Load your image
original_image = yakari_pieces
original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

# Visualize sub-images with zooming (alpha)
visualize_sub_images(original_image, overlap=0.2, alpha=0.5)  # Largest sub-image size


