### Creating Bounding Box text files for Training dataset:

In [12]:
import os
from PIL import Image

# Paths to images and masks directories
images_dir = r"C:\MSAAI\AAI-521\Final Project Data\YOLO Model Data\S1_images\train"
masks_dir = r"C:\MSAAI\AAI-521\Final Project Data\YOLO Model Data\S1_segmasks\train_segmasks"

# Verify alignment with adjusted mask filename
for image_name in os.listdir(images_dir):
    image_path = os.path.join(images_dir, image_name)
    mask_path = os.path.join(masks_dir, image_name.replace(".jpg", "_segmentation.png"))  # Adjusted naming
    if not os.path.exists(mask_path):
        print(f"Missing mask for {image_name}")
    else:
        print(f"Mask found for {image_name}")

Mask found for akiec
Mask found for bcc
Mask found for bkl
Mask found for df
Mask found for mel
Mask found for nv
Mask found for vasc


In [None]:
# # Recursively process all subdirectories and images
# for root, _, files in os.walk(images_dir):  # Traverse all directories
#     for image_name in files:
#         if image_name.endswith(".jpg"):  # Ensure it's a valid image file
#             # Get the relative path for the mask directory
#             relative_subdir = os.path.relpath(root, images_dir)
#             masks_subdir = os.path.join(masks_dir, relative_subdir)
            
#             # Call the function with the correct paths
#             mask_to_bounding_box(image_name, root, masks_subdir, output_dir)

In [24]:
import os
import cv2
import numpy as np

def mask_to_bounding_box(image_name, images_dir, masks_dir, output_dir, class_id=0):
    """
    Convert a segmentation mask to YOLO bounding box format and save in the appropriate subdirectory.
    """
    # Paths for the image and corresponding mask
    image_path = os.path.join(images_dir, image_name)
    mask_path = os.path.join(masks_dir, image_name.replace(".jpg", "_segmentation.png"))
    
    # Load the mask as a grayscale image
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    
    # Find the non-zero pixels in the mask
    non_zero_indices = np.nonzero(mask)
    
    # Check if the mask is empty
    if len(non_zero_indices[0]) == 0 or len(non_zero_indices[1]) == 0:
        print(f"Warning: Empty mask for {image_name}")
        return
    
    # Calculate bounding box coordinates
    min_y, max_y = np.min(non_zero_indices[0]), np.max(non_zero_indices[0])
    min_x, max_x = np.min(non_zero_indices[1]), np.max(non_zero_indices[1])
    
    # Get the original image dimensions
    img = cv2.imread(image_path)
    img_height, img_width = img.shape[:2]
    
    # Convert bounding box to YOLO format (normalized)
    x_center = (min_x + max_x) / 2 / img_width
    y_center = (min_y + max_y) / 2 / img_height
    width = (max_x - min_x) / img_width
    height = (max_y - min_y) / img_height
    
    # Determine the subdirectory for this label
    subdir = os.path.basename(os.path.dirname(image_path))  # E.g., 'akiec', 'bcc', etc.
    label_subdir = os.path.join(output_dir, subdir)

    # Create the subdirectory if it doesn't exist
    os.makedirs(label_subdir, exist_ok=True)
    
    # Save the label file in the corresponding subdirectory
    output_file = os.path.join(label_subdir, image_name.replace(".jpg", ".txt"))
    with open(output_file, "w") as f:
        f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")
    print(f"Annotation saved for {image_name} in {label_subdir}")

# Paths to directories
images_dir = r"C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/dataset/images/train"
masks_dir = r"C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/S1_segmasks/train_segmasks"
output_dir = r"C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/dataset/labels/train"

# Recursively process all subdirectories and images
for root, _, files in os.walk(images_dir):  # Traverse all directories
    for image_name in files:
        if image_name.endswith(".jpg"):  # Ensure it's a valid image file
            # Get the relative path for the mask directory
            relative_subdir = os.path.relpath(root, images_dir)
            masks_subdir = os.path.join(masks_dir, relative_subdir)
            
            # Call the function with the correct paths
            mask_to_bounding_box(image_name, root, masks_subdir, output_dir)


Annotation saved for ISIC_0024329.jpg in C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/dataset/labels/train\akiec
Annotation saved for ISIC_0024372.jpg in C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/dataset/labels/train\akiec
Annotation saved for ISIC_0024418.jpg in C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/dataset/labels/train\akiec
Annotation saved for ISIC_0024450.jpg in C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/dataset/labels/train\akiec
Annotation saved for ISIC_0024463.jpg in C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/dataset/labels/train\akiec
Annotation saved for ISIC_0024468.jpg in C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/dataset/labels/train\akiec
Annotation saved for ISIC_0024470.jpg in C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/dataset/labels/train\akiec
Annotation saved for ISIC_0024511.jpg in C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/dataset/labels/train\akiec
Annotation saved for ISIC_0024517.jpg in

### Creating Bounding Box text files for Validation dataset:

In [None]:
# Paths to images and masks directories
val_images_dir = r"C:\MSAAI\AAI-521\Final Project Data\YOLO Model Data\S1_images\train"
val_masks_dir = r"C:\MSAAI\AAI-521\Final Project Data\YOLO Model Data\S1_segmasks\train_segmasks"

# Verify alignment with adjusted mask filename
for val_image_name in os.listdir(val_images_dir):
    val_image_path = os.path.join(val_images_dir, val_image_name)
    val_mask_path = os.path.join(val_masks_dir, val_image_name.replace(".jpg", "_segmentation.png"))  # Adjusted naming
    if not os.path.exists(val_mask_path):
        print(f"Missing mask for {val_image_name}")
    else:
        print(f"Mask found for {val_image_name}")

In [26]:

# Paths to validation data directories
val_images_dir = r"C:\MSAAI\AAI-521\Final Project Data\YOLO Model Data\dataset\images\val"
val_masks_dir = r"C:\MSAAI\AAI-521\Final Project Data\YOLO Model Data\S1_segmasks\val_segmasks"
val_output_dir = r"C:\MSAAI\AAI-521\Final Project Data\YOLO Model Data\dataset\labels\val"

# Recursively process all subdirectories and images
for root, _, files in os.walk(val_images_dir):  # Traverse all directories
    for image_name in files:
        if image_name.endswith(".jpg"):  # Ensure it's a valid image file
            # Get the relative path for the mask directory
            relative_subdir = os.path.relpath(root, val_images_dir)
            val_masks_subdir = os.path.join(val_masks_dir, relative_subdir)
            
            # Call the function with the correct paths
            mask_to_bounding_box(image_name, root, val_masks_subdir, val_output_dir)


print(f"Images in directory: {os.listdir(val_images_dir)}")
print(f"Images Directory Exists: {os.path.exists(val_images_dir)}")
print(f"Masks Directory Exists: {os.path.exists(val_masks_dir)}")
print(f"Output Directory Exists: {os.path.exists(val_output_dir)}")


Annotation saved for ISIC_0025069.jpg in C:\MSAAI\AAI-521\Final Project Data\YOLO Model Data\dataset\labels\val\akiec
Annotation saved for ISIC_0025539.jpg in C:\MSAAI\AAI-521\Final Project Data\YOLO Model Data\dataset\labels\val\akiec
Annotation saved for ISIC_0025780.jpg in C:\MSAAI\AAI-521\Final Project Data\YOLO Model Data\dataset\labels\val\akiec
Annotation saved for ISIC_0025808.jpg in C:\MSAAI\AAI-521\Final Project Data\YOLO Model Data\dataset\labels\val\akiec
Annotation saved for ISIC_0025825.jpg in C:\MSAAI\AAI-521\Final Project Data\YOLO Model Data\dataset\labels\val\akiec
Annotation saved for ISIC_0025948.jpg in C:\MSAAI\AAI-521\Final Project Data\YOLO Model Data\dataset\labels\val\akiec
Annotation saved for ISIC_0026014.jpg in C:\MSAAI\AAI-521\Final Project Data\YOLO Model Data\dataset\labels\val\akiec
Annotation saved for ISIC_0026194.jpg in C:\MSAAI\AAI-521\Final Project Data\YOLO Model Data\dataset\labels\val\akiec
Annotation saved for ISIC_0026626.jpg in C:\MSAAI\AAI-52

checking that the YAML file is readable:

In [19]:
import yaml
import os

yaml_file = r"C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/dataset.yaml"

# Load YAML file
with open(yaml_file, 'r') as f:
    data = yaml.safe_load(f)

print(data)

# Check paths
for split in ['train', 'val']:
    path = data[split]
    if not os.path.exists(path):
        print(f"Error: Path {path} does not exist!")
    else:
        print(f"Path {path} exists.")

{'train': 'C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/dataset/images/train', 'val': 'C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/dataset/images/val', 'nc': 1, 'names': ['skin_cancer']}
Path C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/dataset/images/train exists.
Path C:/MSAAI/AAI-521/Final Project Data/YOLO Model Data/dataset/images/val exists.
