In [None]:
%config Completer.use_jedi = False
%load_ext autoreload
%autoreload 2

In [None]:
import cv2
import os
import torch
import numpy as np 
import pandas as pd 
from glob import glob
from tqdm import tqdm
import supervision as sv
import matplotlib.pyplot as plt
from groundingdino.util.inference import Model
from segment_anything import sam_model_registry, SamPredictor

In [None]:
def plot_image_with_mask(img, mask, alpha=0.4):
    """
    Plots the image with an overlay of the mask in red, resizing the mask to match the image.

    Args:
    - img (str or numpy array): Path to the image file or image array.
    - mask (str or numpy array): Path to the mask file or mask array.
    - alpha (float): Transparency for overlaying mask.
    """

    # If img is a path, read the image file
    if isinstance(img, str):
        img = cv2.imread(img)

    # If mask is a path, read the mask file
    if isinstance(mask, str):
        mask = cv2.imread(mask)

    # Convert image from BGR to RGB for displaying with matplotlib
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Resize the mask to match the image size
    mask_resized = cv2.resize(mask, (img_rgb.shape[1], img_rgb.shape[0]))

    # Combine all three channels of the mask by taking the max value across channels
    mask = np.clip(np.max(mask_resized, axis=2), 0, 1)

    # Create a colored mask (Red where the mask is 1, transparent elsewhere)
    colored_mask = np.zeros_like(img_rgb)  # Same shape as img, but all zeros
    colored_mask[:, :, 0] = mask * 255  # Red channel gets the mask values

    # Overlay the red mask on the original image using alpha blending
    overlay_img = cv2.addWeighted(img_rgb, 1 - alpha, colored_mask, alpha, 0)

    # Plot the image with the red mask overlay
    plt.figure(figsize=(5, 5))
    plt.imshow(overlay_img)
    plt.title("Image with Red Mask Overlay")
    plt.axis('off')
    plt.show()

In [None]:
masks = glob("../data/dataset/training_noisy_labels/*png")
images = [i.replace("training_noisy_labels", "training_patches") for i in masks]
len(images) , len(masks)

In [None]:
idx = 100
plot_image_with_mask(images[idx], masks[idx])
images[idx], masks[idx]

In [None]:
# Grounding DINO
CONFIG_PATH = "../GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py"
WEIGHTS_PATH = "../weights/groundingdino_swint_ogc.pth"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
def segment(sam_predictor: SamPredictor, image: np.ndarray, xyxy: np.ndarray) -> np.ndarray:
    sam_predictor.set_image(image)
    result_masks = []
    for box in xyxy:
        masks, scores, logits = sam_predictor.predict(
            box=box,
            multimask_output=False
        )
        index = np.argmax(scores)
        result_masks.append(masks[index])
    return np.array(result_masks)

SAM_ENCODER_VERSION = "vit_b"
SAM_CHECKPOINT_PATH = "../weights/sam_vit_b_01ec64.pth"
sam = sam_model_registry[SAM_ENCODER_VERSION](checkpoint=SAM_CHECKPOINT_PATH)
sam_predictor = SamPredictor(sam)

In [None]:
classes = ["skyscrapers","buildings" , "house", "warehouses" ,"factory" ,"urban areas"]
BOX_TRESHOLD = 0.10
TEXT_TRESHOLD = 0.10

iou_list=[]
gd_model = Model(CONFIG_PATH, WEIGHTS_PATH , device=device)

In [None]:
import torch
import cv2

def calculate_iou(pred_mask, gt_mask):

    if isinstance(pred_mask, str):
        pred_mask = cv2.imread(pred_mask)

    if isinstance(gt_mask, str):
        gt_mask = cv2.imread(gt_mask)

    gt_mask = torch.from_numpy(gt_mask)
    pred_mask = torch.from_numpy(pred_mask)

    intersection = torch.sum(gt_mask*pred_mask)
    union = torch.sum(pred_mask) + torch.sum(gt_mask) - intersection

    iou = intersection / (union + 1e-6 )

    return iou.item()

***WITHOUT MULTIPROCESSING


In [None]:
from tqdm import tqdm
import os
import supervision as sv
import cv2
import numpy as np

classes = ["skyscrapers","buildings"]
BOX_TRESHOLD = 0.15
TEXT_TRESHOLD = 0.15


In [None]:

# for img_path , mask_path in tqdm(zip(images[:30],masks[:30]), total=len(images)):
#     IMAGE_NAME = os.path.basename(img_path)
#     image = cv2.imread(img_path)
#     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#     detections  = gd_model.predict_with_classes(
#         image=image,
#         classes=classes,
#         box_threshold=BOX_TRESHOLD,
#         text_threshold=TEXT_TRESHOLD,
#     )
#     boxes = []
#     for box in detections.xyxy:
#         if ((box[2]-box[0])*(box[3]-box[1])<15000):
#             boxes.append(box)
#     detections.xyxy = np.array(boxes)
#     detections.mask = segment(
#         sam_predictor=sam_predictor,
#         image=cv2.cvtColor(image, cv2.COLOR_BGR2RGB),
#         xyxy=detections.xyxy
#     )
    
#     pred_mask = detections.mask.astype(int)
#     if pred_mask.shape[0]==0:
#         pred_mask = np.zeros((image.shape[0],image.shape[1]))
#     else:
#         pred_mask = np.max(pred_mask , axis = 0)
    
#     pred_mask = np.stack([pred_mask]*3, axis=-1)
#     # output_path = img_path.replace('training_patches', 'pred_masks_sam_dino')
#     # os.makedirs(os.path.dirname(output_path), exist_ok=True)

#     # cv2.imwrite(output_path, pred_mask)
#     iou = calculate_iou(pred_mask , mask_path)
#     img_iou = [IMAGE_NAME , iou]
#     iou_list.append(img_iou)

In [None]:
def process_image(img_path, mask_path, sam_predictor, gd_model, classes, BOX_TRESHOLD, TEXT_TRESHOLD):
    IMAGE_NAME = os.path.basename(img_path)
    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Perform prediction
    detections = gd_model.predict_with_classes(
        image=image,
        classes=classes,
        box_threshold=BOX_TRESHOLD,
        text_threshold=TEXT_TRESHOLD,
    )

    # Filter boxes based on size threshold
    boxes = [box for box in detections.xyxy if (box[2] - box[0]) * (box[3] - box[1]) < 15000]
    detections.xyxy = np.array(boxes)

    # Check if image_embeddings are properly computed
    if sam_predictor.features is None:
        raise ValueError(f"Image embeddings are not computed for image: {img_path}")

    # Perform segmentation using mask predictor
    detections.mask = segment(
        sam_predictor=sam_predictor,
        image=cv2.cvtColor(image, cv2.COLOR_BGR2RGB),
        xyxy=detections.xyxy
    )
    
    # Process the mask
    pred_mask = detections.mask.astype(int)
    if pred_mask.shape[0] == 0:
        pred_mask = np.zeros((image.shape[0], image.shape[1]))
    else:
        pred_mask = np.max(pred_mask, axis=0)

    pred_mask = np.stack([pred_mask] * 3, axis=-1)

    # Compute IoU
    iou = calculate_iou(pred_mask, mask_path)
    
    return IMAGE_NAME, iou

# Parallel processing remains the same


In [None]:
# Parallel processing of images
def parallel_process_images(images, masks, sam_predictor, gd_model, classes, BOX_TRESHOLD, TEXT_TRESHOLD, max_workers=6):
    iou_list = []
    futures = []
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for img_path, mask_path in zip(images[:30], masks[:30]):
            futures.append(executor.submit(process_image, img_path, mask_path, sam_predictor, gd_model, classes, BOX_TRESHOLD, TEXT_TRESHOLD))
        
        for future in tqdm(as_completed(futures), total=len(futures)):
            img_iou = future.result()
            iou_list.append(img_iou)
    
    return iou_list

# Call the parallel processing function
iou_list = parallel_process_images(images, masks, sam_predictor, gd_model, classes, BOX_TRESHOLD, TEXT_TRESHOLD)