## imports

In [1]:
import pandas as pd
import numpy as np
import cv2
import SimpleITK as sitk
import os
import shutil

## BrEaST Data Preprocessing into images and labels structure

In [26]:
src_dir = '../data/mass_data/BrEaST-Lesions_USG-images_and_masks/'
lab_dir = '../data/mass_data/BrEaST-Lesions_USG-images_and_masks/labels/'
img_dir = '../data/mass_data/BrEaST-Lesions_USG-images_and_masks/images/'

In [31]:
for file in os.listdir(path = src_dir):
    tum_det = file.split('_')
    if len(tum_det) > 1  and tum_det[1] == 'tumor.png':
        if not os.path.exists(lab_dir + file):
            shutil.copy(src_dir + file, lab_dir + file)

In [32]:
for file in os.listdir(path = src_dir):
    tum_det = file.split('_')
    if len(tum_det) < 2 and file.lower().endswith('.png'):
        if not os.path.exists(img_dir + file):
            shutil.copy(src_dir + file, img_dir + file)

## AIIMS Masses data processing for luminal vs non luminal categorization

In [17]:
img_dir = '../data/mass_data/AIIMS_Delhi_Mass_Data/images/'
mask_dir = '../data/mass_data/AIIMS_Delhi_Mass_Data/labels/'
dest_dir = '../data/mass_data/AIIMS_Delhi_Mass_Data/subtype_classification/images/'
dest_masks_dir = '../data/mass_data/AIIMS_Delhi_Mass_Data/subtype_classification/masks/'
labels = pd.read_csv('../data/mass_data/AIIMS_Delhi_Mass_Data/available_mass_images_labels.csv')

labels.columns

In [18]:
labels.dropna(subset = ['ER'], inplace = True)

In [19]:
labels['Luminal'] = 0

In [20]:
labels.loc[labels['ER'].str.startswith('POSITIVE'), 'Luminal'] = 1

In [24]:
labels['Patient_id'] = labels['Patient_id'].astype(int)

In [38]:
(2 == labels['Patient_id']).any()

True

In [41]:
import shutil
for file in os.listdir(img_dir):
    #print(file, file.split('.')[0].split('_')[0])
    if (int(file.split('.')[0].split('_')[0]) == labels['Patient_id']).any():
        #print("hi", file)
        if not os.path.exists(dest_dir + file):
            shutil.copy(img_dir + file, dest_dir + file)
     

In [42]:
dest_masks_dir

'../data/mass_data/AIIMS_Delhi_Mass_Data/subtype_classification/masks/'

In [43]:
for file in os.listdir(mask_dir):
    if (int(file.split('.')[0].split('_')[0]) == labels['Patient_id']).any():
        if not os.path.exists(dest_masks_dir + file):
            shutil.copy(mask_dir + file, dest_masks_dir + file)

In [45]:
import pandas as pd
import os

In [46]:
labels = pd.read_csv('../data/mass_data/AIIMS_Delhi_Mass_Data/subtype_classification/luminal_labels.csv')

In [51]:
dest_dir = '../data/mass_data/AIIMS_Delhi_Mass_Data/subtype_classification/images/'
ls = []
for im in os.listdir(dest_dir):
    sample = {}
    sample['Patient_id'] = im.split('.')[0].split('_')[0]
    #print(im.split('.')[0].split('_')[0])
    sample['Image_file'] = im
    ls.append(sample)
    #print(im)

In [52]:
lsdf = pd.DataFrame(ls)
lsdf['Patient_id'] = lsdf['Patient_id'].astype(int)

In [56]:
lsdf['Patient_id'].count()

200

In [54]:
labels['Patient_id'].nunique()

77

In [57]:
final_labels = labels.merge(lsdf, on = 'Patient_id', how = 'left')

In [58]:
final_labels.to_csv("../data/mass_data/AIIMS_Delhi_Mass_Data/subtype_classification/luminal_labels.csv")

In [59]:
len(final_labels)

200

In [1]:
# resizing masks to be the same size as images

In [1]:
import os
from pathlib import Path
import cv2
import numpy as np

# Paths
img_dir = Path("../data/mass_data/AIIMS_Delhi_Mass_Data/subtype_classification/images")
lbl_dir = Path("../data/mass_data/AIIMS_Delhi_Mass_Data/subtype_classification/masks")
out_dir = Path("../data/mass_data/AIIMS_Delhi_Mass_Data/subtype_classification/resized_masks")

# Create output directory
out_dir.mkdir(parents=True, exist_ok=True)

# Allowed extensions
img_exts = {".png", ".jpg", ".jpeg", ".tif", ".tiff", ".bmp"}
lbl_exts = {".png", ".jpg", ".jpeg", ".tif", ".tiff", ".bmp"}

# Build index for labels by stem (prefix without extension)
label_index = {}
for p in lbl_dir.iterdir():
    if p.is_file() and p.suffix.lower() in lbl_exts:
        label_index.setdefault(p.stem, []).append(p)

def read_image_cv2(path):
    # Read as BGR
    img = cv2.imread(str(path), cv2.IMREAD_COLOR)
    if img is None:
        raise RuntimeError(f"Failed to read image: {path}")
    return img

def read_mask_cv2(path):
    # Read as grayscale to preserve labels
    mask = cv2.imread(str(path), cv2.IMREAD_UNCHANGED)
    if mask is None:
        raise RuntimeError(f"Failed to read mask: {path}")
    return mask

# Process images
processed, missing, resized = 0, [], 0
for img_path in img_dir.iterdir():
    if not (img_path.is_file() and img_path.suffix.lower() in img_exts):
        continue

    stem = img_path.stem

    # Find a label with the same prefix
    lbl_path = None
    if stem in label_index:
        # Prefer .png if multiple, then by name
        cands = sorted(label_index[stem], key=lambda x: (x.suffix.lower() != ".png", x.name))
        lbl_path = cands[0]

    if lbl_path is None:
        missing.append(stem)
        continue

    # Read image and assert size
    img = read_image_cv2(img_path)
    h, w = img.shape[:2]
    assert (w, h) == (256, 256), f"Image {img_path.name} is {w}x{h}, expected 256x256."

    # Read mask
    mask = read_mask_cv2(lbl_path)

    # Resize mask if needed (nearest neighbor to preserve labels)
    target_size = (w, h)
    if (mask.shape[1], mask.shape[0]) != target_size:
        mask_resized = cv2.resize(mask, target_size, interpolation=cv2.INTER_NEAREST)
    else:
        mask_resized = mask

    # Ensure output is uint8 (common for label masks)
    if mask_resized.dtype != np.uint8:
        mask_resized = mask_resized.astype(np.uint8)

    # Save mask as PNG with same stem as image
    out_path = out_dir / f"{stem}.png"
    cv2.imwrite(str(out_path), mask_resized)

    resized += 1
    processed += 1

print(f"Processed: {processed}, Resized masks: {resized}, Missing pairs: {len(missing)}")
if missing:
    print("No matching label for:", ", ".join(missing[:20]) + (" ..." if len(missing) > 20 else ""))


Processed: 200, Resized masks: 200, Missing pairs: 0


In [3]:
import cv2
import numpy as np

def show_image_and_mask(image_path, mask_path, alpha=0.5, mask_color=(0, 0, 255)):
    """
    Display:
      - Original image
      - Image with mask overlaid (mask as transparent colored overlay)

    Args:
        image_path (str or Path): Path to the original image.
        mask_path  (str or Path): Path to the corresponding mask.
        alpha (float): Transparency of the mask overlay (0..1).
        mask_color (tuple): BGR color for the mask overlay.
    """
    # Read original image (BGR)
    image = cv2.imread(str(image_path), cv2.IMREAD_COLOR)
    if image is None:
        raise ValueError(f"Could not read image from {image_path}")

    # Read mask (grayscale or unchanged)
    mask = cv2.imread(str(mask_path), cv2.IMREAD_UNCHANGED)
    if mask is None:
        raise ValueError(f"Could not read mask from {mask_path}")

    # If mask has multiple channels, convert to single-channel
    if mask.ndim == 3:
        # Take one channel or convert to gray
        mask_gray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
    else:
        mask_gray = mask

    # Ensure same spatial size
    if image.shape[:2] != mask_gray.shape[:2]:
        raise ValueError(
            f"Image and mask must have same size, got {image.shape[:2]} vs {mask_gray.shape[:2]}"
        )

    # Create a 3-channel color mask for overlay
    color_mask = np.zeros_like(image, dtype=np.uint8)
    color_mask[:] = mask_color  # BGR

    # Create boolean mask: where mask > 0 is foreground
    # Adjust threshold as needed (e.g., > 127) depending on your masks
    mask_binary = mask_gray > 0

    # Prepare overlay image (copy of original)
    overlay = image.copy()

    # Blend only where mask is 1
    overlay[mask_binary] = cv2.addWeighted(
        image[mask_binary], 1 - alpha,
        color_mask[mask_binary], alpha,
        0
    )

    # Show original and overlay
    cv2.imshow("Original", image)
    cv2.imshow("Image + Mask Overlay", overlay)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    # Optionally return the overlay for further use
    return overlay


In [None]:
img = show_image_and_mask("../data/mass_data/AIIMS_Delhi_Mass_Data/subtype_classification/images/2_1.tif", "../data/mass_data/AIIMS_Delhi_Mass_Data/subtype_classification/resized_masks/2_1.png")