# Prepare Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip drive/MyDrive/Data/Buildings_v1.zip

In [None]:
!unzip drive/MyDrive/Data/vpair_sample.zip

In [None]:
import os
len(os.listdir("train/images")) + len(os.listdir("valid/images")) + len(os.listdir("test/images"))

In [None]:
from collections import defaultdict


cities = defaultdict(lambda: 0)


for set_ in ["train", "valid", "test"]:
    imgs = os.listdir(f"{set_}/images")
    for img in imgs:
        city = img.split("_")[0].split("-")[0]
        cities[city] += 1

In [None]:
cities

# Import Libs

In [None]:
import os
import random
from pprint import pprint
from IPython.display import Image, display

from ultralytics import YOLO, settings


def get_random_file(folder_path):
    # Get list of all files in the folder
    files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

    if not files:
        raise FileNotFoundError("No files found in the specified directory.")

    # Select a random file
    random_file = random.choice(files)

    return os.path.join(folder_path, random_file)

# Work with YOLO

In [None]:
settings

In [None]:
# Disable W&B
import os
os.environ['WANDB_MODE'] = 'disabled'

## Training

In [None]:
model = YOLO("model/yolo11n-seg_modified.yaml")

### Dataset downloaded from roboflow: https://universe.roboflow.com/roboflow-universe-projects/buildings-instance-segmentation/dataset/4

In [None]:
train_results = model.train(
    data="data.yaml",
    epochs=100,
    imgsz=640,
    device=0
)

## Evaluation

In [None]:
metrics = model.val(
    data="data.yaml",
    # imgsz=640
)

In [None]:
random_file = get_random_file("test/images")
results = model(random_file)
results[0].show()

In [None]:
Image(random_file)

In [None]:
import matplotlib.pyplot as plt
import cv2
import numpy as np
from typing import Tuple, Optional
from ultralytics import YOLO


def plot_yolo_results_from_path(
    image_path: str,
    model: YOLO,
    true_segmentation_path: Optional[str] = None,
    show_yolo_detections: bool = True,
    show_confidence: bool = True,
    show_labels: bool = True,
    figsize: Tuple[int, int] = (18, 6),
    conf_threshold: float = 0.25,
    iou_threshold: float = 0.45
) -> None:
    """
    Loads an image from the given path, optionally performs YOLO inference, optionally loads true segmentation data,
    and plots the original image, true segmentation regions, and YOLO detections side by side.

    Parameters:
    - image_path (str): Path to the original image.
    - model (YOLO): The YOLO model instance from Ultralytics YOLOv8.
    - true_segmentation_path (Optional[str]): Path to the TXT file containing true segmentation data.
    - show_yolo_detections (bool): Whether to perform YOLO inference and display detections.
    - show_confidence (bool): Whether to display confidence scores on the detections.
    - show_labels (bool): Whether to display class labels on the detections.
    - figsize (tuple): Size of the matplotlib figure.
    - conf_threshold (float): Confidence threshold for filtering detections.
    - iou_threshold (float): IoU threshold for Non-Max Suppression.

    Returns:
    - None. Displays the plot.
    """
    # -------------------------
    # 1. Load the Original Image
    # -------------------------
    original_image = cv2.imread(image_path)
    if original_image is None:
        raise FileNotFoundError(f"Image not found at path: {image_path}")

    height, width = original_image.shape[:2]

    # Convert original image to RGB for plotting
    if original_image.shape[2] == 3:
        original_plot = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
    else:
        original_plot = original_image.copy()

    # Initialize list to hold images and their titles
    images = [("Original Image", original_plot)]

    # -------------------------
    # 2. Load and Parse True Segmentation Data (Optional)
    # -------------------------
    if true_segmentation_path is not None:
        true_segmentation_image = None
        # Initialize a blank mask
        true_mask = np.zeros((height, width), dtype=np.uint8)

        try:
            with open(true_segmentation_path, 'r') as file:
                lines = file.readlines()

            for line_num, line in enumerate(lines, start=1):
                tokens = line.strip().split()
                if len(tokens) < 3:
                    print(f"Warning: Line {line_num} has insufficient data and will be skipped.")
                    continue  # Not enough data to form a polygon

                class_id = tokens[0]  # Assuming first token is class ID (can be used for coloring)
                coords = tokens[1:]

                if len(coords) % 2 != 0:
                    print(f"Warning: Line {line_num} has an odd number of coordinates and will be skipped.")
                    continue  # Coordinates should be in x,y pairs

                # Extract x and y coordinates, assuming they are normalized (0 to 1)
                try:
                    x_normalized = list(map(float, coords[::2]))
                    y_normalized = list(map(float, coords[1::2]))
                except ValueError:
                    print(f"Warning: Line {line_num} contains non-float values and will be skipped.")
                    continue  # Skip lines with non-float values

                # Convert normalized coordinates to pixel values
                x_pixels = [int(x * width) for x in x_normalized]
                y_pixels = [int(y * height) for y in y_normalized]

                # Ensure pixel values are within image bounds
                x_pixels = [min(max(x, 0), width - 1) for x in x_pixels]
                y_pixels = [min(max(y, 0), height - 1) for y in y_pixels]

                # Combine x and y into a list of (x, y) tuples
                points = np.array(list(zip(x_pixels, y_pixels)), dtype=np.int32)

                # Draw the polygon on the true_mask
                cv2.fillPoly(true_mask, [points], color=255)  # White color for mask

        except Exception as e:
            print(f"Error while reading true segmentation file: {e}")
            true_mask = None

        if true_mask is not None:
            # Create a colored mask for visualization (e.g., blue)
            true_mask_colored = np.zeros_like(original_image)
            true_mask_colored[:, :, 0] = true_mask  # Blue channel

            alpha = 0.4  # Transparency factor
            true_segmentation_image = cv2.addWeighted(original_image, 1, true_mask_colored, alpha, 0)

            # Convert to RGB for plotting
            if true_segmentation_image.shape[2] == 3:
                true_segmentation_image = cv2.cvtColor(true_segmentation_image, cv2.COLOR_BGR2RGB)
            else:
                true_segmentation_image = true_segmentation_image.copy()

            # Append to images list
            images.append(("True Segmentation", true_segmentation_image))

    # -------------------------
    # 3. Perform YOLO Inference and Annotate Image (Optional)
    # -------------------------
    if show_yolo_detections:
        results = model(original_image, conf=conf_threshold, iou=iou_threshold)

        if not results:
            print("Warning: No results returned from the YOLO model.")
            annotated_plot = original_plot  # Fallback to original image
        else:
            # Assuming single image inference
            detection = results[0]

            # Annotate the image with detections
            annotated_image = original_image.copy().astype(np.uint8)

            # Check if masks are available
            masks_available = detection.masks is not None and hasattr(detection.masks, 'xy') and len(detection.masks.xy) > 0

            # Iterate over each detection with an index
            for i, box in enumerate(detection.boxes):
                # Extract bounding box coordinates and convert to integers
                if box.xyxy is not None and box.xyxy.shape[0] > 0:
                    x1, y1, x2, y2 = [int(coord) for coord in box.xyxy[0].tolist()]
                else:
                    continue  # Skip if bounding box is not available

                # Extract confidence score
                confidence = box.conf.item() if box.conf is not None else None

                # Extract class ID and get the class name
                class_id = int(box.cls.item()) if box.cls is not None else None
                label = model.names[class_id] if class_id is not None and class_id < len(model.names) else 'Object'

                # Draw bounding box in green
                cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color=(0, 255, 0), thickness=2)

                # Prepare label text
                label_text = label
                if show_confidence and confidence is not None:
                    label_text += f' {confidence:.2f}'

                # Put label text above the bounding box
                if show_labels:
                    (text_width, text_height), baseline = cv2.getTextSize(
                        label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
                    # Ensure the text background doesn't go above the image
                    y_label = max(y1 - text_height - baseline, 0)
                    cv2.rectangle(
                        annotated_image,
                        (x1, y_label),
                        (x1 + text_width, y_label + text_height + baseline),
                        (0, 255, 0),
                        thickness=cv2.FILLED
                    )
                    cv2.putText(
                        annotated_image,
                        label_text,
                        (x1, y_label + text_height),
                        cv2.FONT_HERSHEY_SIMPLEX,
                        0.5,
                        (0, 0, 0),
                        2
                    )

                # If segmentation masks are available and YOLO segmentation display is enabled, overlay them
                if masks_available and i < len(detection.masks.xy):
                    mask = detection.masks.xy[i]  # Get mask polygon points for the ith detection
                    if mask is not None and len(mask) > 0:
                        # Create a binary mask from polygon points
                        mask_img = np.zeros((height, width), dtype=np.uint8)
                        polygon = np.array(mask, dtype=np.int32)
                        cv2.fillPoly(mask_img, [polygon], 255)

                        # Create a colored mask (e.g., red)
                        colored_mask = np.zeros_like(annotated_image)
                        colored_mask[:, :, 2] = mask_img  # Red channel

                        alpha = 0.4  # Transparency factor
                        annotated_image = cv2.addWeighted(annotated_image, 1, colored_mask, alpha, 0)

            # Convert annotated image to RGB for plotting
            if annotated_image.shape[2] == 3:
                annotated_plot = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
            else:
                annotated_plot = annotated_image.copy()

            # Append to images list
            images.append(("YOLO Detections", annotated_plot))

    # -------------------------
    # 4. Plotting with Matplotlib
    # -------------------------
    num_images = len(images)
    fig, axes = plt.subplots(1, num_images, figsize=figsize)

    # If only one subplot, axes is not a list
    if num_images == 1:
        axes = [axes]

    for ax, (title, img) in zip(axes, images):
        ax.imshow(img)
        ax.set_title(title)
        ax.axis('off')

    plt.tight_layout()
    plt.show()


In [None]:
image_path = get_random_file("test/images")
# image_path = get_random_file("vpair_sample/queries")
label_path = image_path.replace("images", "labels")[:-3] + "txt"

plot_yolo_results_from_path(
    image_path=image_path,
    model=model,
    true_segmentation_path=label_path,
    show_yolo_detections=True,
    show_confidence=True,
    show_labels=True,
    figsize=(12, 6),
    conf_threshold=0.25,
    iou_threshold=0.7
)

## Calculate strides values for embeddings

In [None]:
model = YOLO("model/best.pt")
model

In [None]:
import torch
import torch.nn as nn

class ShapeRecorder:
    """A hook class to store the output shape of a given layer."""
    def __init__(self):
        self.output_shape = None

    def __call__(self, module, module_in, module_out):
        """
        module_out is the output tensor of shape (B, C, H, W).
        We store only (H, W).
        """
        if isinstance(module_out, torch.Tensor):
            # If output is a single Tensor
            self.output_shape = module_out.shape[-2:]  # (H, W)
        elif isinstance(module_out, (list, tuple)) and len(module_out) > 0:
            # Some modules might return tuples/lists, e.g., (features, aux)
            # If so, store the shape of the first tensor
            if isinstance(module_out[0], torch.Tensor):
                self.output_shape = module_out[0].shape[-2:]
            else:
                self.output_shape = None
        else:
            self.output_shape = None

def measure_backbone_strides(yolo_model, input_size=(640, 640)):
    """
    1) Hooks every submodule in yolo_model.model.model (the backbone + neck).
    2) Runs a dummy forward pass with a (1, 3, input_size[0], input_size[1]) tensor.
    3) Computes stride as input_height / output_height for each submodule.

    Returns:
      strides_list: a list of (layer_index, (H_out, W_out), stride_value)
                    in the order the submodules are encountered.
    """
    # 1) Convert submodules to a list so we can index them
    modules_list = list(yolo_model.model.model.children())

    # 2) Create a shape recorder + hook for each submodule
    recorders = []
    hooks = []
    for module in modules_list:
        recorder = ShapeRecorder()
        hook = module.register_forward_hook(recorder)
        recorders.append(recorder)
        hooks.append(hook)

    # 3) Prepare a dummy input
    dummy_input = torch.randn(1, 3, input_size[0], input_size[1])

    # 4) Perform a raw forward pass (no grad needed)
    with torch.no_grad():
        _ = yolo_model.model(dummy_input)
        # Usually yolo_model.model(...) calls the backbone+neck
        # If it raises an error, you might need to adapt:
        #   e.g. out = yolo_model.model.model(dummy_input)

    # 5) Compute strides from recorded shapes
    strides_list = []
    for idx, rec in enumerate(recorders):
        out_shape = rec.output_shape  # (H_out, W_out) or None
        if out_shape is not None:
            H_out, W_out = out_shape
            # Assuming the model keeps aspect ratio the same,
            # stride = input_height / H_out = input_size[0] / H_out
            stride_h = input_size[0] / H_out
            stride_w = input_size[1] / W_out
            # Typically stride_h == stride_w for square inputs and isotropic downsampling
            stride_val = (stride_h, stride_w)
            strides_list.append((idx, (H_out, W_out), stride_val))
        else:
            strides_list.append((idx, None, None))

    # 6) Remove hooks
    for h in hooks:
        h.remove()

    return strides_list


In [None]:
model = YOLO("model/best.pt")
measure_backbone_strides(model)

# pretty-format confusion matrix

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path


def plot_confusion_matrix(cm,
                          normalize=True,
                          save_dir="",
                          names=(),
                          on_plot=None,
                          text_size=12,
                          cmap="Blues"):
    """
    Plot a confusion matrix with full text‐size and colormap control.

    Args:
        cm (2D list or np.ndarray): Confusion matrix; entries may be int or None.
        normalize (bool): Normalize by column.
        save_dir (str): If provided, directory to save the figure.
        names (tuple): Class names for ticks.
        on_plot (callable): Optional callback getting the saved path.
        text_size (float or str): Font size for all text (annotations, title, axes, ticks).
        cmap (str or Colormap): Matplotlib colormap for the matrix.
    """
    original = np.array(cm, dtype=object)
    cm_numeric = np.array([[0 if v is None else v for v in row] for row in cm], dtype=float)

    # normalize
    if normalize:
        col_sums = cm_numeric.sum(axis=0, keepdims=True) + 1e-9
        mat = cm_numeric / col_sums
        mat[mat < 0.005] = np.nan
    else:
        mat = cm_numeric.copy()

    fig, ax = plt.subplots(tight_layout=True)

    # colormap
    im = ax.imshow(mat,
                   interpolation='nearest',
                   cmap=cmap,
                   vmin=0.0,
                   vmax=np.nanmax(mat) if np.nanmax(mat) > 0 else 1)
    cbar = fig.colorbar(im, ax=ax)
    # make colorbar ticks readable
    for lbl in cbar.ax.get_yticklabels():
        lbl.set_fontsize(text_size)

    # ticks & labels
    n = cm_numeric.shape[0]
    if names:
        if len(names) == n:
            ticks = list(names)
        elif len(names) == n - 1:
            ticks = list(names) + ["background"]
        else:
            ticks = "auto"
    else:
        ticks = "auto"

    ax.set_xticks(np.arange(cm_numeric.shape[1]))
    ax.set_yticks(np.arange(cm_numeric.shape[0]))
    if ticks != "auto":
        ax.set_xticklabels(ticks, fontsize=text_size)
        ax.set_yticklabels(ticks, fontsize=text_size)
    else:
        ax.set_xticklabels(np.arange(cm_numeric.shape[1]), fontsize=text_size)
        ax.set_yticklabels(np.arange(cm_numeric.shape[0]), fontsize=text_size)

    # axis labels & title
    ax.set_xlabel("True", fontsize=text_size)
    ax.set_ylabel("Predicted", fontsize=text_size)
    title = "Confusion Matrix" + (" Normalized" if normalize else "")
    ax.set_title(title, fontsize=text_size)

    # annotations
    max_val = np.nanmax(mat) if np.nanmax(mat) > 0 else 1
    thresh = max_val / 2.0
    for i in range(n):
        for j in range(n):
            if original[i, j] is None:
                txt = ""
            elif normalize and np.isnan(mat[i, j]):
                txt = ""
            else:
                txt = f"{mat[i, j]:.2f}" if normalize else f"{cm_numeric[i, j]:.0f}"
            val = mat[i, j] if not (normalize and np.isnan(mat[i, j])) else 0
            color = "white" if val > thresh else "black"
            ax.text(j, i, txt,
                    ha="center", va="center",
                    color=color,
                    fontsize=text_size)

    # save if requested
    if save_dir:
        fname = Path(save_dir) / f'{title.lower().replace(" ", "_")}.png'
        fig.savefig(fname, dpi=250)
        if on_plot:
            on_plot(fname)

    plt.show()
    plt.close(fig)

In [None]:
confusion_matrix = [
	[18794, 8462],
	[5628,  None]
]
class_names = ['Building', 'Background']
plot_confusion_matrix(
	confusion_matrix,
    normalize=False,
    save_dir="",
    names=class_names,
    text_size=16,
    cmap="Blues"
)