In [1]:
import cv2
import os
import numpy as np 
import pandas as pd 
from glob import glob
from tqdm import tqdm
import matplotlib.pyplot as plt
from ultralyticsplus import YOLO, render_result 
# pip install ultralyticsplus==0.0.21

## Testing an open source segmentation model

We first start with loading an open source model for building segmentation problem and use that to rank the masks. For this experiment, we are looking into https://yolov8.xyz/#/?id=about-the-project repo, look for "building-segmentation" to find the relevant model. Let's load it up


In [2]:
def plot_image_with_mask(img, mask, alpha=0.4):
    """
    Plots the image with an overlay of the mask in red, resizing the mask to match the image.

    Args:
    - img (str or numpy array): Path to the image file or image array.
    - mask (str or numpy array): Path to the mask file or mask array.
    - alpha (float): Transparency for overlaying mask.
    """

    # If img is a path, read the image file
    if isinstance(img, str):
        img = cv2.imread(img)

    # If mask is a path, read the mask file
    if isinstance(mask, str):
        mask = cv2.imread(mask)

    # Convert image from BGR to RGB for displaying with matplotlib
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Resize the mask to match the image size
    mask_resized = cv2.resize(mask, (img_rgb.shape[1], img_rgb.shape[0]))

    # Combine all three channels of the mask by taking the max value across channels
    mask = np.clip(np.max(mask_resized, axis=2), 0, 1)

    # Create a colored mask (Red where the mask is 1, transparent elsewhere)
    colored_mask = np.zeros_like(img_rgb)  # Same shape as img, but all zeros
    colored_mask[:, :, 0] = mask * 255  # Red channel gets the mask values

    # Overlay the red mask on the original image using alpha blending
    overlay_img = cv2.addWeighted(img_rgb, 1 - alpha, colored_mask, alpha, 0)

    # Plot the image with the red mask overlay
    plt.figure(figsize=(6, 6))
    plt.imshow(overlay_img)
    plt.title("Image with Red Mask Overlay")
    plt.axis('off')
    plt.show()

In [3]:
# Creates a list of all the images and masks in the folder
masks = glob(
    "/c/Users/syedd/Documents/mine/DCLC3/data/dataset/training_noisy_labels/*png"
)
images = glob(
    "/c/Users/syedd/Documents/mine/DCLC3/data/dataset/training_patches/*png"
)

In [4]:
idx = 80
plot_image_with_mask(images[idx], masks[idx])

IndexError: list index out of range

Task #1: Understand the different models and hyperparameters for the model. Tune the model to get the best (doesn't have to be perfect) results. Look at multiple images for this experiment.


In [None]:
# load model
model = YOLO('keremberke/yolov8m-building-segmentation')

# set model parameters
model.overrides['conf'] = 0.25  # NMS confidence threshold
model.overrides['iou'] = 0.45  # NMS IoU threshold
model.overrides['agnostic_nms'] = False  # NMS class-agnostic
model.overrides['max_det'] = 1000  # maximum number of detections per image

In [None]:
# perform inference
results = model.predict(images[idx])
pred_mask = results[0].masks.masks.cpu().permute(1,2,0).numpy()

In [None]:
# note: prediction mask shape is differnt from the input image. Side quest: figure out why?
pred_mask.shape

In [None]:
# let's see what the predictions are made of
np.unique(pred_mask)

In [None]:
# plot the predicted mask
plot_image_with_mask(images[idx], pred_mask)

## Run Inference using the open source model

Task #2 :  Once the model is tuned, run infernce on all the images to get the masks. We will use these masks for ranking the images later on. 

In [55]:
masks = glob("/home/contact_mine_ai/DCLC3/data/dataset/training_noisy_labels/*png")
images = glob("/home/contact_mine_ai/DCLC3/data/dataset/training_patches/*png")

In [None]:
# load model
model = YOLO('keremberke/yolov8m-building-segmentation')

# set the tuned model parameters
model.overrides['conf'] = 0.01  # NMS confidence threshold
model.overrides['iou'] = 0.45  # NMS IoU threshold
model.overrides['agnostic_nms'] = False  # NMS class-agnostic
model.overrides['max_det'] = 100000  # maximum number of detections per image

In [None]:
for img_path, msk_path in tqdm(zip(images[80:], masks[80:]), total=len(images)):
    # Perform inference
    results = model.predict(img_path)
    
    # Check if masks exist in the result
    if results[0].masks is not None and len(results[0].masks.masks) > 0:
        # Get the predicted mask (assuming you're working with the first result)
        pred_mask = np.max(results[0].masks.masks.cpu().permute(1, 2, 0).numpy(), axis=2)
        pred_mask = np.stack([pred_mask] * 3, axis=-1)  # Stack to make it 3 channels (RGB)
    else:
        # If no mask is predicted, create an empty (black) mask of the same shape as the input image
        img = cv2.imread(img_path)  # Read the image to get its dimensions
        pred_mask = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8)  # Create black mask (3-channel)

    # Resize the predicted mask (or empty mask) to 256x256
    pred_mask_resized = cv2.resize(pred_mask, (256, 256))

    # Create the corresponding output path by replacing 'training_patches' with 'pred_masks'
    output_path = img_path.replace('training_patches', 'pred_masks')

    # Ensure the output directory exists (for nested directories)
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # Save the resized mask to the output directory
    cv2.imwrite(output_path, pred_mask_resized)
    
    break  # Assuming this is for debugging, you can remove it for the full loop

In [None]:
# observe results
print(results[0].boxes)
print(results[0].masks)
render = render_result(model=model, image=img_path, result=results[0])
render

In [None]:
# plot the predicted mask
plt.imshow(pred_mask_resized)

In [None]:
# plot the predicted mask with image
plot_image_with_mask(img_path, pred_mask_resized)

In [None]:
# plot the mask from dataset
plot_image_with_mask(img_path, msk_path)

## Run Eval and Rank the images

Task #3 : Using the predicted masks and actual masks, write a function to calculate the IoU metric between them to rank the images.