## JSON ANNOTATIONS (MASTER & PER FOLDER) -> BOUNDING BOX FOR OBJECT DETECTION

In [3]:
import os
import json
import cv2
import re
from tqdm.notebook import tqdm  # Jupyter-compatible progress bar

# Set your dataset root directory
root_dir = r"D:\internship\gnctd\biankatpas-Cracks-and-Potholes-in-Road-Images-Dataset-1f20054\biankatpas-Cracks-and-Potholes-in-Road-Images-Dataset-1f20054\Dataset"

def extract_bounding_boxes(mask_path):
    """Extract bounding boxes from a binary mask image."""
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    if mask is None:
        return []
    _, thresh = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    bboxes = []
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        bboxes.append([x, y, x + w, y + h])  # Format: [x_min, y_min, x_max, y_max]
    return bboxes

def extract_metadata(folder_name):
    """Extract state and highway metadata from the folder name."""
    match = re.search(r'_(RS|ES|DF)_(\d+)_', folder_name)
    if match:
        state, highway = match.groups()
        return state, f"BR-{highway}"
    return None, None

# List to collect all image annotations
dataset_annotations = []

# Get list of all folders inside the dataset directory
all_folders = [f for f in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, f))]

# Loop over all folders with a progress bar
for subdir in tqdm(all_folders, desc="Processing image folders"):
    folder_path = os.path.join(root_dir, subdir)
    state, highway = extract_metadata(subdir)
    if not state or not highway:
        continue  # Skip folders with missing or invalid metadata

    base_name = subdir
    raw_image_path = os.path.join(folder_path, f"{base_name}_RAW.jpg")
    crack_mask_path = os.path.join(folder_path, f"{base_name}_CRACK.png")
    pothole_mask_path = os.path.join(folder_path, f"{base_name}_POTHOLE.png")

    crack_bboxes = extract_bounding_boxes(crack_mask_path)
    pothole_bboxes = extract_bounding_boxes(pothole_mask_path)

    image_annotations = {
        'image': raw_image_path,
        'state': state,
        'highway': highway,
        'annotations': []
    }

    for bbox in crack_bboxes:
        image_annotations['annotations'].append({
            'label': 'crack',
            'bbox': bbox
        })

    for bbox in pothole_bboxes:
        image_annotations['annotations'].append({
            'label': 'pothole',
            'bbox': bbox
        })

    # Add to global annotations list
    dataset_annotations.append(image_annotations)

    # ✅ Also save a per-folder annotation file
    per_folder_json_path = os.path.join(folder_path, "annotation.json")
    with open(per_folder_json_path, "w") as f:
        json.dump(image_annotations, f, indent=2)

# ✅ Save the full dataset annotations as a single JSON file
output_json_path = os.path.join(root_dir, "annotations.json")
with open(output_json_path, "w") as f:
    json.dump(dataset_annotations, f, indent=2)

print(f"✅ Global annotation file saved to: {output_json_path}")
print("✅ Per-folder annotation files also saved.")

Processing image folders:   0%|          | 0/2235 [00:00<?, ?it/s]

✅ Global annotation file saved to: D:\internship\gnctd\biankatpas-Cracks-and-Potholes-in-Road-Images-Dataset-1f20054\biankatpas-Cracks-and-Potholes-in-Road-Images-Dataset-1f20054\Dataset\annotations.json
✅ Per-folder annotation files also saved.


## GENERATING COMBINED MASKS(POTHOLE+CRACK) PER IMAGE FOR SEMANTIC SEGMENTATION (pothole = 255, cracks = 127, bg = 0 with pothole as priority class on overlapping areas)

In [5]:
import os
from PIL import Image
import numpy as np
from tqdm.notebook import tqdm  # Jupyter-compatible progress bar

# Set your root dataset directory
root_dir = r"D:\internship\gnctd\biankatpas-Cracks-and-Potholes-in-Road-Images-Dataset-1f20054\biankatpas-Cracks-and-Potholes-in-Road-Images-Dataset-1f20054\Dataset"

# List all sub‐folders
all_folders = [f for f in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, f))]

for subdir in tqdm(all_folders, desc="Re‐generating combined masks"):
    folder_path = os.path.join(root_dir, subdir)
    
    crack_mask_path   = os.path.join(folder_path, f"{subdir}_CRACK.png")
    pothole_mask_path = os.path.join(folder_path, f"{subdir}_POTHOLE.png")
    
    if not (os.path.exists(crack_mask_path) and os.path.exists(pothole_mask_path)):
        print(f"⚠️  Skipping {subdir}: missing CRACK or POTHOLE mask.")
        continue
    
    # Load masks as grayscale arrays (values 0 or 255)
    crack_mask   = np.array(Image.open(crack_mask_path).convert("L"))
    pothole_mask = np.array(Image.open(pothole_mask_path).convert("L"))
    
    # Create a new combined mask (uint8)
    combined = np.zeros_like(crack_mask, dtype=np.uint8)
    
    # 1) Mark potholes first (value = 255)
    combined[pothole_mask > 0] = 255
    
    # 2) Mark cracks (value = 127) only where there is no pothole
    combined[(crack_mask > 0) & (pothole_mask == 0)] = 127
    
    # Save (overwrite) combined mask in the same folder
    combined_path = os.path.join(folder_path, f"{subdir}_COMBINED.png")
    Image.fromarray(combined).save(combined_path)

Re‐generating combined masks:   0%|          | 0/2235 [00:00<?, ?it/s]