# City Shadows: Scene Analysis

This notebook analyzes Google Street View images to identify individual buildings and trees, and records their measurements.

## I. PREPARATION

### ✅ 1. Import Libraries

In [None]:
# Import libraries
import torch
import cv2
import os
import json
import csv
import re
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm 
from PIL import Image
from pathlib import Path

# Instance/Panoptic segmentation
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.utils.colormap import random_color
from detectron2.data import MetadataCatalog

# Depth estimation
from transformers import DPTForDepthEstimation, DPTFeatureExtractor

# new
from sklearn.cluster import KMeans
from sklearn.neighbors import KDTree
from scipy.ndimage import grey_dilation
from scipy.stats import gaussian_kde

In [None]:
# Using CUDA instead of CPU
print("Torch version:", torch.__version__)
print("CUDA Available:", torch.cuda.is_available())
print("CUDA Version:", torch.version.cuda)
print("Device Count:", torch.cuda.device_count())

### ✅ 2. Define Paths

In [None]:
# Define paths
input_ungrouped_path = "../data/input/input_makati_ungrouped" # ungrouped images
input_grouped_path = "../data/input/input_makati_grouped" # grouped images by coordinates
input_vanishing_path = "../data/input/input_vanishing_reference"

output_depth_path = "../data/output/output_depth"
output_semantic_path = "../data/output/output_semantic"
output_instance_path = "../data/output/output_instance"
output_panoptic_path = "../data/output/output_panoptic"
output_json_path = "../data/output/output_json"
output_final_json_path = "../data/output/output_final_json"

save_depth_path = "../data/save/save_depth"
save_panoptic_path = "../data/save/save_panoptic"
save_semantic_path = "../data/save/save_semantic"
save_instance_path = "../data/save/save_instance"

color_map_path = "../data/color_map/color_map_modified.txt"

### ✅ 3. Group GSV Images by Coordinates

In [None]:
# Group images into folders by their coordinates
image_list = [f for f in os.listdir(input_ungrouped_path) if f.lower().endswith((".png", ".jpg", ".jpeg"))]
total_images = len(image_list)

for i, image_name in enumerate(tqdm(image_list, desc="Processing images")):
    image_path = os.path.join(input_ungrouped_path, image_name)
    image = cv2.imread(image_path)

    # Extract folder name (string before the underscore)
    folder_name = image_name.split("_")[0]
    folder_path = os.path.join(input_grouped_path, folder_name)

    # Create the folder if it doesn’t exist
    os.makedirs(folder_path, exist_ok=True)

    # Save a copy of the image inside a folder of the input_grouped_path
    output_path = os.path.join(folder_path, image_name)
    cv2.imwrite(output_path, image)

    # Delete the original image from the input_ungrouped_path
    os.remove(image_path) #optional

In [None]:
# List folders with less than 8 images
parent_dir = input_grouped_path
image_extensions = (".png", ".jpg", ".jpeg")
folders_with_few_images = []

for folder_name in os.listdir(parent_dir):
    folder_path = os.path.join(parent_dir, folder_name)
    if os.path.isdir(folder_path):
        image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(image_extensions)]
        if len(image_files) < 8:
            folders_with_few_images.append((folder_name, len(image_files)))

for folder, count in folders_with_few_images:
    print(f"{folder}: {count} image(s)")

In [None]:
'''
# For testing purposes only
def rename_files_with_prefix(root_folder, prefix="semantic_"):
    # Traverse all subfolders
    for subdir, _, files in tqdm(os.walk(root_folder), desc="Renaming files"):
        for filename in files:
            # Skip if already starts with prefix
            if filename.startswith(prefix):
                continue

            # Construct full paths
            old_path = os.path.join(subdir, filename)
            new_filename = prefix + filename
            new_path = os.path.join(subdir, new_filename)

            # Rename file
            os.rename(old_path, new_path)

# Example usage
root_folder = output_semantic_path
rename_files_with_prefix(root_folder)
'''

## II. PANOPTIC-SEMANTIC SEGMENTATION

### ✅ 1. Save Panoptic Segmentation Model

In [None]:
# Save Detectron2 configuration and weights
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml")

# Save the config
os.makedirs(save_panoptic_path, exist_ok=True)
with open(f"{save_panoptic_path}/config.yaml", "w") as f:
    f.write(cfg.dump())

# Save model weights
torch.save(cfg.MODEL.WEIGHTS, f"{save_panoptic_path}/model.pth")
print(f"Model config and weights saved to {save_panoptic_path}")

### ✅ 2. Load Panoptic Segmentation Model

In [None]:
# Load Detectron2 model
cfg = get_cfg()
cfg.merge_from_file(f"{save_panoptic_path}/config.yaml")
cfg.MODEL.WEIGHTS = torch.load(f"{save_panoptic_path}/model.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # confidence threshold
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

panoptic_model = DefaultPredictor(cfg)

print("Detectron2 loaded successfully!")
print("Detectron2 is using:", cfg.MODEL.DEVICE)

# Use the dataset's labels
if cfg.DATASETS.TRAIN:
    dataset_name = cfg.DATASETS.TRAIN[0]
    coco_metadata = MetadataCatalog.get(dataset_name)
    print("Dataset name:", dataset_name)
    
    print("Number of thing classes:", len(coco_metadata.thing_classes))
    print("Number of stuff classes:", len(coco_metadata.stuff_classes))
    
    # Print thing classes with contiguous IDs
    if hasattr(coco_metadata, "thing_classes"):
        print("\nThing Classes:")
        for contiguous_id, class_name in enumerate(coco_metadata.thing_classes):
            color = coco_metadata.thing_colors[contiguous_id] if hasattr(coco_metadata, "thing_colors") else "N/A"
            print(f"- ID {contiguous_id}: {class_name}, Color: {color}")
    
    # Print stuff classes with contiguous IDs
    if hasattr(coco_metadata, "stuff_classes"):
        print("\nStuff Classes:")
        for contiguous_id, class_name in enumerate(coco_metadata.stuff_classes):
            color = coco_metadata.stuff_colors[contiguous_id] if hasattr(coco_metadata, "stuff_colors") else "N/A"
            print(f"- ID {contiguous_id}: {class_name}, Color: {color}")
else:
    print("Warning: No dataset is registered in cfg.DATASETS.TRAIN")
    coco_metadata = None

### ✅ 3. Run Panoptic Segmentation as Semantic Segmentation

In [None]:
'''
# Perform Panoptic Segmentation and Extract "Stuff" for Semantic Segmentation
image_list = []
for root, _, files in os.walk(input_grouped_path):
    for f in files:
        if f.lower().endswith((".png", ".jpg", ".jpeg")):
            image_list.append(os.path.join(root, f))

total_images = len(image_list)

for i, image_path in enumerate(tqdm(image_list, desc="Processing images")):
    image = cv2.imread(image_path)

    # Extract filename and folder name
    image_name = os.path.basename(image_path)
    folder_name = image_name.split("_")[0]  # or os.path.basename(os.path.dirname(image_path))
    folder_path = os.path.join(output_semantic_path, folder_name)
    os.makedirs(folder_path, exist_ok=True)
    
    # Run panoptic segmentation
    outputs = panoptic_model(image)    
    panoptic_seg, segments_info = outputs["panoptic_seg"]
    
    # Convert to numpy once
    panoptic_seg_np = panoptic_seg.cpu().numpy()
    
    # Generate a blank semantic segmentation mask (BGR)
    semantic_mask = np.zeros((panoptic_seg_np.shape[0], panoptic_seg_np.shape[1], 3), dtype=np.uint8)
    
    # Assign COCO metadata colors
    for segment in segments_info:
        segment_id = segment["id"]
        category_id = segment["category_id"]
        
        # Use numpy boolean indexing instead of comparison
        mask = panoptic_seg_np == segment_id
        
        if segment["isthing"]:
            color = coco_metadata.thing_colors[category_id]
        else:
            color = coco_metadata.stuff_colors[category_id]
            
        semantic_mask[mask] = color[::-1]  # Converts to BGR
    
    # Save segmented image inside the grouped folder
    output_path = os.path.join(folder_path, f"semantic_{image_name}")
    cv2.imwrite(output_path, semantic_mask)
    '''

### ⛔ 4. Visualize Panoptic Segmentation (FOR TESTING PURPOSES ONLY)

In [None]:
# Perform panoptic segmentation
image_list = []
for root, _, files in os.walk(input_grouped_path):
    for f in files:
        if f.lower().endswith((".png", ".jpg", ".jpeg")):
            image_list.append(os.path.join(root, f))

total_images = len(image_list)

for i, image_path in enumerate(tqdm(image_list, desc="Processing images")):
    image = cv2.imread(image_path)

    # Extract filename
    image_name = os.path.basename(image_path)
    
    # Run panoptic segmentation
    outputs = panoptic_model(image)    

    # Confirm if output tensors are on GPU
    for k, v in outputs.items():
        if hasattr(v, 'device'):
            print(f"{k} is on {v.device}")

    # Draw panoptic segmentation image
    panoptic_seg, segments_info = outputs["panoptic_seg"]
    v = Visualizer(image[:, :, ::-1], metadata=coco_metadata, scale=1,instance_mode=ColorMode.SEGMENTATION)
    output_image = v.draw_panoptic_seg_predictions(panoptic_seg.to("cpu"), segments_info).get_image()

    # Show image in Jupyter Notebook
    print(f"panoptic{image_name}")
    plt.figure(figsize=(5, 3))
    plt.imshow(output_image)
    plt.axis("off")  # Hide axes
    plt.show()

## III. DEPTH SENSING

### ✅ 1. Save Depth Sensing Model

In [None]:
# Save DPT model
depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas")
depth_processor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas")

os.makedirs(save_instance_path, exist_ok=True)
depth_model.save_pretrained(save_depth_path)
depth_processor.save_pretrained(save_depth_path)
print(f"Model saved to {save_depth_path}")

### ✅ 2. Load Depth Sensing Model

In [None]:
# Load DPT model
depth_model = DPTForDepthEstimation.from_pretrained(save_depth_path)
depth_processor = DPTFeatureExtractor.from_pretrained(save_depth_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
depth_model.to(device)
depth_model.eval()

print("DPT loaded successfully!")
print("DPT is using:", device)

### ✅ 3. Run Depth Estimation

In [None]:
'''
# Perform Depth Estimation
# Get all image paths from subfolders of input_grouped_path
image_list = []
for root, _, files in os.walk(input_grouped_path):
    for f in files:
        if f.lower().endswith((".png", ".jpg", ".jpeg")):
            image_list.append(os.path.join(root, f))

total_images = len(image_list)

for i, image_path in enumerate(tqdm(image_list, desc="Processing images")):
    image = cv2.imread(image_path)

    # Extract filename and folder name
    image_name = os.path.basename(image_path)
    folder_name = image_name.split("_")[0]  # or os.path.basename(os.path.dirname(image_path))
    folder_path = os.path.join(output_depth_path, folder_name)
    os.makedirs(folder_path, exist_ok=True)

    # Convert BGR to RGB
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Preprocess and move to device
    inputs = depth_processor(images=image_rgb, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Run depth estimation
    with torch.no_grad():
        depth_map = depth_model(**inputs).predicted_depth

    # Normalize and format
    depth_map = depth_map.squeeze().cpu().numpy()
    depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
    depth_map = (depth_map * 255).astype(np.uint8)

    # Resize to match original
    output_image = cv2.resize(depth_map, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_CUBIC)

    # Save output
    output_path = os.path.join(folder_path, f"depth_{image_name}")
    cv2.imwrite(output_path, output_image)

    # Optional: delete original
    # os.remove(image_path)
'''

### ⛔ 4. Visualize Depth Estimation (FOR TESTING PURPOSES ONLY)

In [None]:
# Perform and Visualize Depth Estimation
# Get all image paths from subfolders of input_grouped_path
image_list = []
for root, _, files in os.walk(input_grouped_path):
    for f in files:
        if f.lower().endswith((".png", ".jpg", ".jpeg")):
            image_list.append(os.path.join(root, f))

total_images = len(image_list)

for i, image_path in enumerate(tqdm(image_list, desc="Processing images")):
    image = cv2.imread(image_path)

    # Extract filename
    image_name = os.path.basename(image_path)

    # Convert BGR to RGB
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Preprocess and move to device
    inputs = depth_processor(images=image_rgb, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Run depth estimation
    with torch.no_grad():
        depth_map = depth_model(**inputs).predicted_depth

    # Normalize and format
    depth_map = depth_map.squeeze().cpu().numpy()
    depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
    depth_map = (depth_map * 255).astype(np.uint8)

    # Resize to match original
    output_image = cv2.resize(depth_map, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_CUBIC)

    # Show image in Jupyter Notebook
    print(f"depth_{image_name}")
    plt.figure(figsize=(6, 4))
    plt.imshow(output_image, cmap="magma")
    plt.axis("off")
    plt.show()

## IV. HEIGHT, WIDTH, AND DISTANCE ESTIMATION

### ✅  1. Define Helper Functions

a. Converting hexcode to rgb tuple

In [None]:
def hex_to_rgb(hex_color):
    hex_color = hex_color.lstrip('#')
    return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))

b. Post-process semantic images

In [None]:
def is_thin_shape(component_mask, max_thickness=2):
    mask = (component_mask > 0).astype(np.uint8)
    thickness = 0
    while np.any(mask):
        mask = cv2.erode(mask, np.ones((3, 3), np.uint8))
        thickness += 1
        if thickness > max_thickness:
            return False
    return True

In [None]:
def classes_to_black(img, color_map, classes_to_remove):
    img_out = img.copy()

    for class_name in classes_to_remove:
        if class_name not in color_map:
            continue
        rgb = np.array(hex_to_rgb(color_map[class_name]))
        mask = np.all(img_out == rgb, axis=2)
        img_out[mask] = [0, 0, 0]  # Set to black

    return img_out

In [None]:
def clean_semantic(img_rgb, color_map, tolerance=40, min_area=30, max_thickness=3):
    h, w, _ = img_rgb.shape
    cleaned = np.zeros_like(img_rgb)

    # Convert hex colors to RGB array
    class_items = list(color_map.items())
    class_colors = [np.array(hex_to_rgb(hex_color)) for _, hex_color in class_items]
    class_colors = np.array(class_colors)  # shape: (num_classes, 3)

    # Flatten image
    img_flat = img_rgb.reshape(-1, 3)

    # Compute color differences
    diffs = np.abs(img_flat[:, None, :] - class_colors[None, :, :])  # (N, C, 3)
    color_diffs = np.sum(diffs, axis=2)  # (N, C)
    min_diff = np.min(color_diffs, axis=1)
    min_idx = np.argmin(color_diffs, axis=1)

    # Assign class color if within tolerance
    cleaned_flat = cleaned.reshape(-1, 3)
    valid = min_diff < tolerance
    cleaned_flat[valid] = class_colors[min_idx[valid]]

    cleaned = cleaned.reshape(h, w, 3)

    # Remove small or thin structures for each class
    for color in class_colors:
        mask = np.all(cleaned == color, axis=2).astype(np.uint8)
        num, labels, stats, _ = cv2.connectedComponentsWithStats(mask, connectivity=8)

        for i in range(1, num):  # skip background
            area = stats[i, cv2.CC_STAT_AREA]
            component_mask = (labels == i).astype(np.uint8)

            if area < min_area or is_thin_shape(component_mask, max_thickness=max_thickness):
                cleaned[labels == i] = [0, 0, 0]  # replace with black

    # Make awning, canopy, bridge color black
    classes_to_be_removed = ['awning', 'canopy', 'bridge']
    final = classes_to_black(cleaned, color_map, classes_to_be_removed)

    return final


In [None]:
def inpaint_semantic(mask_rgb, vanishing_lines, color_map, max_iters=1000):
    filled = mask_rgb.copy()
    H, W = filled.shape[:2]

    # Encode image
    color_encoded = (filled[:, :, 0].astype(np.int32) << 16) + \
                    (filled[:, :, 1].astype(np.int32) << 8) + \
                    filled[:, :, 2].astype(np.int32)

    black_mask = (color_encoded == 0)

    # === Stage 1: Pre-fill black pixels above sky in vanishing regions ===
    sky_rgb = hex_to_rgb(color_map['sky'])
    sky_val = (sky_rgb[0] << 16) + (sky_rgb[1] << 8) + sky_rgb[2]

    lines_sorted = sorted(vanishing_lines, key=lambda line: line[0][0])
    for i in range(len(lines_sorted) - 1):
        (x1a, y1a), (x1b, y1b) = lines_sorted[i]
        (x2a, y2a), (x2b, y2b) = lines_sorted[i + 1]

        poly = np.array([[x1a, y1a], [x1b, y1b], [x2b, y2b], [x2a, y2a]], dtype=np.int32)
        region_mask = np.zeros((H, W), dtype=np.uint8)
        cv2.fillPoly(region_mask, [poly], 1)

        # Fill sky pixels upward
        region = (region_mask == 1)
        sky_pixels = np.logical_and(region, color_encoded == sky_val)
        ys, xs = np.where(sky_pixels)
        for x, y in zip(xs, ys):
            for y_up in range(y - 1, -1, -1):
                if not region_mask[y_up, x] or color_encoded[y_up, x] != 0:
                    break
                color_encoded[y_up, x] = sky_val

    # === Stage 2: Pre-fill black pixels below floor in vanishing regions ===
    floor_classes = [
        'sidewalk', 'pavement', 'road', 'route', 'floor', 'flooring',
        'ground', 'earth', 'field', 'runway', 'land', 'soil',
        'sand', 'dirt', 'track', 'path', 'rock', 'stone'
    ]
    floor_colors = [hex_to_rgb(color_map[c]) for c in floor_classes if c in color_map]
    floor_vals = set((r << 16) + (g << 8) + b for r, g, b in floor_colors)

    for i in range(len(lines_sorted) - 1):
        (x1a, y1a), (x1b, y1b) = lines_sorted[i]
        (x2a, y2a), (x2b, y2b) = lines_sorted[i + 1]

        poly = np.array([[x1a, y1a], [x1b, y1b], [x2b, y2b], [x2a, y2a]], dtype=np.int32)
        region_mask = np.zeros((H, W), dtype=np.uint8)
        cv2.fillPoly(region_mask, [poly], 1)

        # Fill floor pixels downward
        region = (region_mask == 1)
        floor_pixels = np.logical_and(region, np.isin(color_encoded, list(floor_vals)))
        ys, xs = np.where(floor_pixels)
        for x, y in zip(xs, ys):
            val = color_encoded[y, x]
            for y_down in range(y + 1, H):
                if not region_mask[y_down, x] or color_encoded[y_down, x] != 0:
                    break
                color_encoded[y_down, x] = val

    # === Stage 3: General isotropic inpainting ===
    black_mask = (color_encoded == 0)
    for _ in range(max_iters):
        if not np.any(black_mask):
            break
        dilated = grey_dilation(color_encoded, size=(3, 3))
        replacement_mask = black_mask & (dilated != 0)
        color_encoded[replacement_mask] = dilated[replacement_mask]
        black_mask = (color_encoded == 0)

    # Decode to RGB
    r = (color_encoded >> 16) & 255
    g = (color_encoded >> 8) & 255
    b = color_encoded & 255
    result_rgb = np.stack([r, g, b], axis=2).astype(np.uint8)

    return result_rgb


In [None]:
def has_ceiling(img):
    # Load image and convert to RGB
    img_np = np.array(img)

    # Get the top 1/4 of the image
    height = img_np.shape[0]
    top_quarter = img_np[:height // 4, :, :]

    # Create mask for black pixels
    black_pixels = np.all(top_quarter == [0, 0, 0], axis=-1)

    # Calculate the percentage of black pixels
    black_ratio = np.sum(black_pixels) / black_pixels.size

    # Return True if black pixels make up at least 20%
    return black_ratio >= 0.20

In [None]:
def has_ceiling_2(img, color_map, vanishing_lines):
    H, W = img.shape[:2]
    
    structure_classes = {'building', 'edifice', 'house', 'skyscraper', 'tower'}
    wall_classes = {'wall'}
    relevant_classes = structure_classes | wall_classes

    # Build RGB -> label map
    color_to_label = {}
    label = 1
    for cls, hex_color in color_map.items():
        rgb = hex_to_rgb(hex_color)
        color_to_label[rgb] = label
        label += 1

    # Build label -> class name map
    rgb_to_class = {hex_to_rgb(v): k for k, v in color_map.items()}
    sky_rgb = hex_to_rgb(color_map['sky'])

    # Convert image to label mask
    label_img = np.zeros((H, W), dtype=np.uint16)
    for rgb, label_val in color_to_label.items():
        match_mask = np.all(img == rgb, axis=2)
        label_img[match_mask] = label_val

    # Get labels for relevant structure/wall classes
    relevant_labels = {
        color_to_label[hex_to_rgb(color_map[cls])]
        for cls in relevant_classes if cls in color_map
    }

    sky_label = color_to_label[sky_rgb]
    total_sky = np.sum(label_img == sky_label)
    if total_sky == 0:
        return False

    # Sort vanishing lines
    lines_sorted = sorted(vanishing_lines, key=lambda line: line[0][0])

    sky_below_total = 0
    for i in range(len(lines_sorted) - 1):
        (x1a, y1a), (x1b, y1b) = lines_sorted[i]
        (x2a, y2a), (x2b, y2b) = lines_sorted[i + 1]

        poly = np.array([[x1a, y1a], [x1b, y1b], [x2b, y2b], [x2a, y2a]], dtype=np.int32)
        region_mask = np.zeros((H, W), dtype=np.uint8)
        cv2.fillPoly(region_mask, [poly], 1)

        # Restrict label image to region
        region_labels = np.where(region_mask == 1, label_img, 0)

        # Find the highest y (min y) where a structure/wall label exists
        struct_ys, _ = np.where(np.isin(region_labels, list(relevant_labels)))
        if struct_ys.size == 0:
            continue
        y_top = struct_ys.min()

        # Find sky pixels in region *below* y_top
        region_sky_mask = np.logical_and(label_img == sky_label, region_mask == 1)
        sky_ys, _ = np.where(region_sky_mask)
        sky_below = np.sum(sky_ys > y_top)
        sky_below_total += sky_below

    return (sky_below_total / total_sky) >= 0.5

c. Creating masks

In [None]:
def clean_mask(mask, min_area=50, kernel_size=2):
    # Morphological opening
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
    opened  = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)

    # Connected‑component filtering 
    num, labels, stats, _ = cv2.connectedComponentsWithStats(opened, connectivity=8)
    cleaned = np.zeros_like(mask)

    for i in range(1, num):
        area = stats[i, cv2.CC_STAT_AREA]
        if area >= min_area:
            cleaned[labels == i] = 255

    return cleaned

In [None]:
def create_mask(semantic_img, class_labels, color_map, tolerance=40): # tolerance of 20
    mask = np.zeros(semantic_img.shape[:2], dtype=np.uint8)
    for class_label in class_labels:
        target_color = np.array(hex_to_rgb(color_map[class_label]))
        diff = np.abs(semantic_img - target_color)
        within_tol = np.all(diff < tolerance, axis=2)
        mask[within_tol] = 255
    return clean_mask(mask)

d. Extracting objects

In [None]:
def extract_objects(mask, depth, min_area=600):
    object_masks = []

    # Ensure binary mask
    binary_mask = (mask > 0).astype(np.uint8)

    # Connected component analysis
    num_labels, labels = cv2.connectedComponents(binary_mask, connectivity=8)

    # Skip background (label 0)
    for i in range(1, num_labels):
        obj_mask = (labels == i).astype(np.uint8) * 255

        # Filter out small objects
        if cv2.countNonZero(obj_mask) >= min_area:
            object_masks.append(obj_mask)

    return object_masks


In [None]:
'''
def extract_objects2(mask, depth, depth_cluster_thresh=5.0, min_split_area=2000):
    object_masks = []

    binary_mask = (mask > 0).astype(np.uint8)
    num_labels, labels = cv2.connectedComponents(binary_mask, connectivity=8)

    for i in range(1, num_labels):  # Skip background
        obj_mask = (labels == i).astype(np.uint8)
        masked_depth = depth[obj_mask > 0]

        if len(masked_depth) < 50:
            continue  # skip very small objects

        # Depth-based clustering
        kmeans = KMeans(n_clusters=2, n_init="auto")
        depths = masked_depth.reshape(-1, 1)
        kmeans.fit(depths)
        centers = kmeans.cluster_centers_.flatten()
        depth_diff = np.abs(centers[0] - centers[1])

        if depth_diff > depth_cluster_thresh:
            # Split into two masks
            full_indices = np.where(obj_mask > 0)
            cluster_labels = kmeans.labels_

            mask1 = np.zeros_like(obj_mask, dtype=np.uint8)
            mask2 = np.zeros_like(obj_mask, dtype=np.uint8)

            for j in range(len(cluster_labels)):
                y, x = full_indices[0][j], full_indices[1][j]
                if cluster_labels[j] == 0:
                    mask1[y, x] = 255
                else:
                    mask2[y, x] = 255

            if np.sum(mask1) > min_split_area:
                object_masks.append(mask1)
            if np.sum(mask2) > min_split_area:
                object_masks.append(mask2)
        else:
            object_masks.append(obj_mask * 255)

    return object_masks
'''

In [None]:
'''
def extract_objects3(mask, depth, expected_count):
    binary_mask = (mask > 0).astype(np.uint8)
    num_labels, labels = cv2.connectedComponents(binary_mask, connectivity=8)

    depth_points = []
    pixel_coords = []

    for i in range(1, num_labels):  # Skip background
        obj_mask = (labels == i).astype(np.uint8)
        ys, xs = np.where(obj_mask)

        for y, x in zip(ys, xs):
            d = depth[y, x]
            if np.isfinite(d):
                depth_points.append([d])
                pixel_coords.append((y, x))

    if len(depth_points) < expected_count:
        return []  # not enough data to cluster

    kmeans = KMeans(n_clusters=expected_count, n_init="auto")
    kmeans.fit(depth_points)
    cluster_labels = kmeans.labels_

    masks = [np.zeros_like(mask, dtype=np.uint8) for _ in range(expected_count)]
    for (y, x), label in zip(pixel_coords, cluster_labels):
        masks[label][y, x] = 255

    final_objects = []
    for cluster_mask in masks:
        # Get connected components and keep only the largest
        num_labels, labels = cv2.connectedComponents(cluster_mask, connectivity=8)

        max_area = 0
        max_mask = None
        for i in range(1, num_labels):
            component_mask = (labels == i).astype(np.uint8) * 255
            area = np.sum(component_mask > 0)
            if area > max_area and area > min_split_area:
                max_area = area
                max_mask = component_mask

        if max_mask is not None:
            final_objects.append(max_mask)

    return final_objects
'''

e. Creating vanishing lines and dividing objects

In [None]:
def get_vanishing_lines(img, num_lines=100, extra_side_lines=30):
    vanishing_lines = []
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    h, w = gray.shape

    # Line Segment Detection
    lsd = cv2.createLineSegmentDetector(0)
    lines, _, _, _ = lsd.detect(gray)

    # Filter near-vertical lines
    vertical_lines = []
    for line in lines:
        x1, y1, x2, y2 = line[0]
        dx = x2 - x1
        dy = y2 - y1
        angle = np.degrees(np.arctan2(dy, dx)) % 180
        if 75 <= angle <= 105:  # near-vertical
            vertical_lines.append([x1, y1, x2, y2])

    # Estimate vertical vanishing point
    lines_h = []
    for x1, y1, x2, y2 in vertical_lines:
        p1 = np.array([x1, y1, 1.0])
        p2 = np.array([x2, y2, 1.0])
        line_h = np.cross(p1, p2)
        lines_h.append(line_h)

    if len(lines_h) >= 2:
        A = np.stack(lines_h)
        _, _, Vt = np.linalg.svd(A)
        vp_h = Vt[-1]
        vp = vp_h[:2] / vp_h[2]
    else:
        vp = np.array([w // 2, 0])  # fallback

    # Get bottom-to-vanishing-point lines
    step = w // num_lines
    for i in range(num_lines):
        x = i * step + step // 2
        pt1 = (int(x), h)
        pt2 = (int(vp[0]), int(vp[1]))
        vanishing_lines.append((pt1, pt2))

    # Get side-to-vanishing-point lines from left and right edges
    y_steps = np.linspace(0, h, extra_side_lines, endpoint=False).astype(int)
    for y in y_steps:
        # From left edge
        pt1 = (0, y)
        pt2 = (int(vp[0]), int(vp[1]))
        vanishing_lines.append((pt1, pt2))

        # From right edge
        pt1 = (w - 1, y)
        vanishing_lines.append((pt1, pt2))

    return vanishing_lines

In [None]:
def split_object_by_vanishing_lines(obj_mask, vanishing_lines, img_shape, top_y_jump_thresh=20):
    import cv2
    import numpy as np

    H, W = img_shape[:2]
    masks = []

    # 1. Sort vanishing lines by x of the bottom point (pt1)
    lines_sorted = sorted(vanishing_lines, key=lambda line: line[0][0])

    # 2. Create polygonal region masks
    region_masks = []
    for i in range(len(lines_sorted) - 1):
        (x1a, y1a), (x1b, y1b) = lines_sorted[i]
        (x2a, y2a), (x2b, y2b) = lines_sorted[i + 1]

        poly = np.array([[x1a, y1a], [x1b, y1b], [x2b, y2b], [x2a, y2a]], dtype=np.int32)
        region_mask = np.zeros((H, W), dtype=np.uint8)
        cv2.fillPoly(region_mask, [poly], 1)
        region_masks.append(region_mask)

    # 3. Compute top Y per region
    region_top_ys = []
    for region_mask in region_masks:
        masked = obj_mask * region_mask
        ys, xs = np.where(masked)

        if len(ys) == 0:
            region_top_ys.append(None)
        else:
            top_y = np.mean(ys[ys == np.min(ys)])  # average of top-row pixels
            region_top_ys.append(top_y)

    # 4. Determine valid splits (large top y jump)
    tentative_splits = []
    for i in range(len(region_top_ys) - 1):
        if region_top_ys[i] is None or region_top_ys[i + 1] is None:
            continue

        y_diff = abs(region_top_ys[i] - region_top_ys[i + 1])
        if y_diff > top_y_jump_thresh:
            tentative_splits.append(i + 1)

    # 5. Remove splits that would isolate a single column
    final_splits = []
    for i in range(len(tentative_splits)):
        curr = tentative_splits[i]

        prev_split = tentative_splits[i - 1] if i > 0 else -1
        next_split = tentative_splits[i + 1] if i + 1 < len(tentative_splits) else len(region_masks)

        # Only allow if it won’t isolate a single region
        if next_split - prev_split > 2:
            final_splits.append(curr)

    # 6. Group regions by split boundaries
    split_boundaries = final_splits + [len(region_masks)]
    prev = 0
    for idx in split_boundaries:
        combined_mask = np.zeros((H, W), dtype=np.uint8)
        for i in range(prev, idx):
            combined_mask |= (region_masks[i] & obj_mask)
        if np.any(combined_mask):
            masks.append(combined_mask)
        prev = idx

    return masks

In [None]:
def object_visualization(mask, objects, alpha=0.6):
    if mask.ndim == 2:
        mask_rgb = cv2.cvtColor((mask * 255).astype(np.uint8), cv2.COLOR_GRAY2BGR)
    else:
        mask_rgb = mask.copy()

    overlay = mask_rgb.copy()

    for obj_mask in objects:
        obj_mask_uint8 = (obj_mask > 0).astype(np.uint8)
        contours, _ = cv2.findContours(obj_mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        color = [random.randint(100, 255) for _ in range(3)]
        cv2.drawContours(overlay, contours, -1, color, thickness=1)

    blended = cv2.addWeighted(overlay, 1, mask_rgb, alpha, 0)
    result_image = cv2.cvtColor(blended, cv2.COLOR_BGR2RGB)
    return result_image, len(objects)

f. Displaying

In [None]:
def display_images(tuples, grid_shape=(3, 4), figsize=(16, 6)):
    rows, cols = grid_shape
    plt.figure(figsize=figsize)

    for title, image, cmap, row, col in tuples:
        ax = plt.subplot2grid((rows, cols), (row, col))
        ax.imshow(image, cmap=cmap)
        ax.set_title(title)
        ax.axis('off')

    plt.tight_layout()
    plt.show()

In [None]:
def display_vanishing_lines(img, vanishing_lines, color=(0, 0, 255), thickness=1):
    # Generate image with vanishing lines
    vanishing_lines_img = img.copy()
    for pt1, pt2 in vanishing_lines:
        cv2.line(vanishing_lines_img, pt1, pt2, color, thickness)

    return vanishing_lines_img

In [None]:
def display_objects(objects, title_prefix="Object", cols=5, cmap='gray'):
    rows = (len(objects) + cols - 1) // cols
    plt.figure(figsize=(3 * cols, 3 * rows))

    for i, obj_mask in enumerate(objects):
        plt.subplot(rows, cols, i + 1)
        plt.imshow(obj_mask, cmap=cmap)
        plt.title(f"{title_prefix} {i+1}")
        plt.axis('off')

    plt.tight_layout()
    plt.show()

g. JSON File

In [None]:
def record_objects_to_json(json_path, all_objects_info):
    with open(json_path, 'w') as f:
        json.dump(all_objects_info, f, indent=2)

In [None]:
def touches_image_edge(mask):
    h, w = mask.shape
    top_edge = np.any(mask[0, :])
    bottom_edge = np.any(mask[-1, :])
    left_edge = np.any(mask[:, 0])
    right_edge = np.any(mask[:, -1])
    
    edges = []
    if top_edge: edges.append("top")
    if bottom_edge: edges.append("bottom")
    if left_edge: edges.append("left")
    if right_edge: edges.append("right")

    return edges  # returns list like ['top', 'left']

In [None]:
def get_object_metadata(
    coord_angle,
    obj,
    object_type,
    avg_depth,
    pixel_height,
):
    # DISTANCE (in meters)
    distance = 800 / avg_depth

    # HEIGHT (in meters)
    height = 4.00 * ((pixel_height / avg_depth) ** 1.27) if avg_depth > 0 else 0

    # WIDTH (in meters)
    ys, xs = np.nonzero(obj)
    if xs.size > 0 and pixel_height > 0:
        pixel_width = xs.max() - xs.min()
        scale = height / pixel_height
        width = pixel_width * scale
    else:
        width = 0

    # HORIZONTAL OFFSET: -100 (left), 0 (center), +100 (right)
    if xs.size > 0:
        _, image_width = obj.shape
        object_center_x = (xs.max() + xs.min()) / 2
        image_center_x = image_width / 2
        horizontal_offset = 200 * (object_center_x - image_center_x) / image_width
    else:
        horizontal_offset = 0

    # ISCUT (top/bottom/left/right)
    is_cut = touches_image_edge(obj)

    return {
        "fileName": coord_angle,
        "objectType": object_type,
        "distance": distance,
        "height": height,
        "width": width,
        "horizontalOffset": horizontal_offset,
        "isCut": is_cut
    }


h. Getting average depth and pixel height

In [None]:
def get_depth(mask, semantic_rgb, depth_map, color_map, interested_classes):
    # Convert class hex colors to RGB arrays
    interested_colors = [np.array(hex_to_rgb(color_map[c])) for c in interested_classes]

    region_mask = mask > 0
    depth_values = []

    # Accumulator for combined mask
    accumulated_mask = np.zeros(mask.shape, dtype=np.uint8)

    for color in interested_colors:
        # Exact color match in semantic segmentation
        match_mask = np.all(semantic_rgb == color, axis=2)
        combined_mask = match_mask & region_mask

        # Add to accumulated mask for visualization
        accumulated_mask |= combined_mask.astype(np.uint8)

        # Collect depth values
        values = depth_map[combined_mask]
        if values.size > 0:
            depth_values.extend(values.tolist())

    depth_values = np.array(depth_values)
    depth_values = depth_values[np.isfinite(depth_values) & (depth_values > 0)]

    # If none, return none
    if depth_values.size == 0:
        return None, accumulated_mask

    # If less than 5 or everything is almost the same, return mean
    if depth_values.size < 5 or np.std(depth_values) < 1e-3:
        return float(np.mean(depth_values)), accumulated_mask

    # If more than enough, perform outlier filtering
    if depth_values.size >= 5:
        lower, upper = np.percentile(depth_values, [10, 90])
        filtered = depth_values[(depth_values >= lower) & (depth_values <= upper)]
        if filtered.size >= 1:
            depth_values = filtered
    try:
        kde = gaussian_kde(depth_values)
        xs = np.linspace(depth_values.min(), depth_values.max(), 500)
        density = kde(xs)
        mode_depth = xs[np.argmax(density)]
        return float(mode_depth), accumulated_mask
    except np.linalg.LinAlgError:
        return float(np.mean(depth_values)), accumulated_mask

In [None]:
def get_pixel_height(mask, vanishing_lines):

    h, w = mask.shape
    mask = mask.astype(np.uint8)

    distances = []
    edge_distances = []

    for i in range(len(vanishing_lines) - 1):
        pt1a, pt2a = vanishing_lines[i]
        pt1b, pt2b = vanishing_lines[i + 1]

        # Create polygon between two rays (vanishing lines)
        polygon = np.array([pt1a, pt2a, pt2b, pt1b], dtype=np.int32)

        # Create binary mask of that polygon
        region_mask = np.zeros_like(mask)
        cv2.fillPoly(region_mask, [polygon], 1)

        # Get intersection of region and object mask
        intersect = (mask & region_mask).astype(np.uint8)

        # Get coordinates of nonzero pixels
        ys, xs = np.nonzero(intersect)
        if len(xs) == 0:
            continue

        points = np.stack([xs, ys], axis=1)
        top = points[np.argmin(points[:, 1])]
        bottom = points[np.argmax(points[:, 1])]
        dist = np.linalg.norm(top - bottom)

        # Skip if intersection touches any edge of the image
        if (
            np.any(intersect[0, :]) or     # top edge
            np.any(intersect[-1, :]) or    # bottom edge
            np.any(intersect[:, 0]) or     # left edge
            np.any(intersect[:, -1])       # right edge
        ):
            edge_distances.append(dist)
        else:            
            distances.append(dist)

    distances = np.array(distances)
    edge_distances = np.array(edge_distances)

    # If all columns touch the edge, return highest from edge distances
    # If no columns at all, return None
    if distances.size == 0:
        return max(edge_distances) if edge_distances.size > 0 else None

    # If less than 5 columns not touching the edge, return mean
    if distances.size < 5:
        return np.mean(distances)
    
    # If more than enough, perform outlier filtering
    if distances.size >= 5:
        lower, upper = np.percentile(distances, [10, 90])
        filtered = distances[(distances >= lower) & (distances <= upper)]
        return np.mean(filtered) if filtered.size > 0 else None

In [None]:
def get_top_bottom_y(obj):
    ys, xs = np.where(obj > 0)
    if len(ys) == 0:
        return None, None

    top_y = ys.min()
    bottom_y = ys.max()
    return top_y, bottom_y

### ✅ 2. Store Class-Color Mapping 

In [None]:
# Store color map from text file to variable
color_map = {}

with open(color_map_path, "r") as f:
    for line in f:
        if line.strip():  # skip empty lines
            color_hex, labels = line.strip().split("\t")
            class_names = labels.split(";")
            for class_name in class_names:
                color_map[class_name.strip()] = color_hex.strip()

# Number of unique classes
num_classes = len(color_map)

# Number of unique hex colors
unique_colors = set(color_map.values())
num_colors = len(unique_colors)

print(f"Number of unique classes: {num_classes}")
print(f"Number of unique hex colors: {num_colors}")

display(color_map)

In [None]:
# Class groups
structure_classes = ['building', 'edifice', 'house', 'skyscraper', 'tower']
wall_classes = ['wall']
overhead_classes = ['awning', 'canopy', 'bridge']
vegetation_classes = ['tree', 'palm tree']
base_classes = [
    'sidewalk', 'pavement', 'road', 'route', 'floor', 'flooring',
    'ground', 'earth', 'field', 'runway', 'land', 'soil',
    'sand', 'dirt', 'track', 'path', 'rock', 'stone', 'sky'
]

### ✅ 3. Create vanishing lines

In [None]:
ref_img_path = os.path.join(input_vanishing_path, '14.558112,121.0256685_45.jpg')
ref_img = cv2.imread(ref_img_path)
vanishing_lines = get_vanishing_lines(ref_img)

ref_vanishing_img = display_vanishing_lines(ref_img, vanishing_lines)
ref_vanishing_img_rgb = cv2.cvtColor(ref_vanishing_img, cv2.COLOR_BGR2RGB)

# Display with matplotlib
plt.figure(figsize=(10, 6))
plt.imshow(ref_vanishing_img_rgb)
plt.title("Vanishing Lines")
plt.axis('off')
plt.show()

### ⛔ 4. Create masks for each class, separate them as objects, record their data in JSON

a. Creating JSON files for each coordinate angle

In [None]:
all_objects_info = []

# Traverse coordinate folders
for folder_name in tqdm(sorted(os.listdir(input_grouped_path)), desc="Processing folders"):
    folder_input = os.path.join(input_grouped_path, folder_name)
    folder_depth = os.path.join(output_depth_path, folder_name)
    folder_semantic = os.path.join(output_semantic_path, folder_name)

    if not os.path.isdir(folder_input):
        continue

    for filename in os.listdir(folder_input):
        if not filename.endswith(".jpg"):
            continue
        
        # Parse base filename
        base_name = filename.replace(".jpg", "")
        coord_angle = base_name  # example: "14.55208,121.0210221_0"
        folder_output_json = os.path.join(output_json_path, f"{coord_angle}.json")
        if os.path.exists(folder_output_json):
            continue

        # Build full paths
        img_path = os.path.join(folder_input, filename)
        sem_path = os.path.join(folder_semantic, f"semantic_{coord_angle}.jpg")
        depth_path = os.path.join(folder_depth, f"depth_{coord_angle}.jpg")

        # Check if all files exist
        if not (os.path.exists(img_path) and os.path.exists(sem_path) and os.path.exists(depth_path)):
            continue

        # print(coord_angle)
        
        # Load semantic image
        semantic_bgr = cv2.imread(sem_path)
        semantic_rgb = cv2.cvtColor(semantic_bgr, cv2.COLOR_BGR2RGB)

        # Inpaint semantic image (skip if there's a ceiling)
        cleaned_semantic = clean_semantic(semantic_rgb, color_map)
        # IF black ceiling
        if (has_ceiling(cleaned_semantic)):
            continue
        # IF structure/wall ceiling
        if (has_ceiling_2(cleaned_semantic, color_map, vanishing_lines)):
            continue
        inpainted_semantic = inpaint_semantic(cleaned_semantic, vanishing_lines, color_map)

        # Load original image, depth image, vanishing lines image
        img = cv2.imread(img_path)
        depth = cv2.imread(depth_path, cv2.IMREAD_GRAYSCALE)
        vanishing_lines_img = display_vanishing_lines(img, vanishing_lines)

        # For buildings
        classes_to_be_removed = ['wall', 'awning', 'canopy', 'bridge', 'tree', 'palm tree', 'plant', 'grass']
        inpainted_semantic_2 = inpaint_semantic(classes_to_black(inpainted_semantic, color_map, classes_to_be_removed), vanishing_lines, color_map)

        # Create masks
        structure_mask = create_mask(inpainted_semantic_2, structure_classes, color_map)
        wall_mask = create_mask(inpainted_semantic, wall_classes, color_map)
        vegetation_mask = create_mask(inpainted_semantic, vegetation_classes, color_map)

        # Extract objects (those that aren't connected)
        structure_objects = extract_objects(structure_mask, depth)
        wall_objects = extract_objects(wall_mask, depth)
        vegetation_objects = extract_objects(vegetation_mask, depth)

        # Divide structure objects (vertical column method)
        new_structure_objects = []
        for obj in structure_objects:
            split_masks = split_object_by_vanishing_lines(obj, vanishing_lines, img.shape)
            new_structure_objects.extend(split_masks)

        # Record metadata for all objects
        valid_structure_objects = []
        valid_vegetation_objects = []
        valid_wall_objects = []
        
        # STRUCTURE
        for i, obj in enumerate(new_structure_objects, 1):

            # Get the depth
            avg_depth_structure, object_mask = get_depth(obj, cleaned_semantic, depth, color_map, structure_classes)
            if avg_depth_structure is None:
                continue  # skip to the next object

            # Get the height
            pixel_height_structure = get_pixel_height(obj, vanishing_lines)
            if pixel_height_structure is None:
                continue  # skip to the next object
            
            # Record
            valid_structure_objects.append(obj) 
            meta = get_object_metadata(coord_angle, obj, "structure", avg_depth_structure, pixel_height_structure)
            if meta:
                all_objects_info.append(meta)
        
        new_structure_objects = valid_structure_objects

        # WALL
        for obj in wall_objects:
            
            # Get the depth
            avg_depth_wall, object_mask = get_depth(obj, cleaned_semantic, depth, color_map, wall_classes)
            if avg_depth_wall is None:
                continue  # skip to the next object

            # Get the height
            pixel_height_wall = get_pixel_height(obj, vanishing_lines)
            if pixel_height_wall is None:
                continue  # skip to the next object
            
            # Record
            valid_wall_objects.append(obj) 
            meta = get_object_metadata(coord_angle, obj, "wall", avg_depth_wall, pixel_height_wall)
            if meta:
                all_objects_info.append(meta)

        new_wall_objects = valid_wall_objects

        # VEGETATION
        for i, obj in enumerate(vegetation_objects, 1):

            # Get the depth
            avg_depth_vegetation, object_mask = get_depth(obj, cleaned_semantic, depth, color_map, vegetation_classes)
            if avg_depth_vegetation is None:
                continue  # skip to the next object

            # Get the top and bottom y
            top_y_vegetation, bottom_y_vegetation = get_top_bottom_y(obj)
            trunk_y = min(300, (0.6 * avg_depth_vegetation + 196))
            pixel_height_vegetation = trunk_y - top_y_vegetation
            if (pixel_height_vegetation <= 0):
                continue  # skip to the next object

            # Record
            valid_vegetation_objects.append(obj) 
            meta = get_object_metadata(coord_angle, obj, "vegetation", avg_depth_vegetation, pixel_height_vegetation)
            if meta:
                all_objects_info.append(meta)

            '''
            # Display for testing purposes
            plt.figure(figsize=(3, 3)) 
            plt.imshow(obj, cmap='gray')
            plt.axis('off')
            plt.show()
            
            print(f"Tree {i}: Depth =", avg_depth_vegetation)
            print(f"Tree {i}: Top y = ", top_y_vegetation)
            print(f"Tree {i}: Trunk y = ", trunk_y)
            '''
            
        new_vegetation_objects = valid_vegetation_objects

        # Create object visualizations
        structure_visualization, num_structures = object_visualization(structure_mask, new_structure_objects)
        wall_visualization, num_walls = object_visualization(wall_mask, new_wall_objects)
        vegetation_visualization, num_vegetations = object_visualization(vegetation_mask, new_vegetation_objects)

        '''
        # Display the triplets, the masks, and the objects
        image_grid = [
            ("Original", cv2.cvtColor(img, cv2.COLOR_BGR2RGB), None, 0, 0),
            ("Vanishing Lines", cv2.cvtColor(vanishing_lines_img, cv2.COLOR_BGR2RGB), None, 0, 1),
            ("Depth Map", depth, "magma", 0, 2),

            ("Segmentation Mask", semantic_rgb, None, 1, 0),
            ("Cleaned", cleaned_semantic, None, 1, 1),
            ("Inpainted", inpainted_semantic, None, 1, 2),
            ("Inpainted2", inpainted_semantic_2, None, 1, 3),
        
            ("Structure Mask", cv2.cvtColor(structure_mask, cv2.COLOR_BGR2RGB), None, 2, 0),
            ("Wall Mask", cv2.cvtColor(wall_mask, cv2.COLOR_BGR2RGB), None, 2, 1),
            ("Vegetation Mask", cv2.cvtColor(vegetation_mask, cv2.COLOR_BGR2RGB), None, 2, 2),
            
            (f"Structures ({num_structures})", cv2.cvtColor(structure_visualization, cv2.COLOR_BGR2RGB), None, 3, 0),
            (f"Walls ({num_walls})", cv2.cvtColor(wall_visualization, cv2.COLOR_BGR2RGB), None, 3, 1),
            (f"Vegetation ({num_vegetations})", cv2.cvtColor(vegetation_visualization, cv2.COLOR_BGR2RGB), None, 3, 2),
        ]
        display_images(image_grid, grid_shape=(4, 4), figsize=(16, 10))
        '''

        #display_objects(new_structure_objects, title_prefix="Structure")
        #display_objects(new_wall_objects, title_prefix="Wall")
        #display_objects(new_vegetation_objects, title_prefix="Vegetation")

        # Ensure output directory exists
        os.makedirs(output_json_path, exist_ok=True)
        
        # Save JSON file
        record_objects_to_json(folder_output_json, all_objects_info)
        # print(f"{coord_angle}.json saved")
        
        # Reset for next folder
        all_objects_info = []

b. Combine JSON files into one

In [None]:
input_folder = output_json_path
output_file = os.path.join(output_final_json_path, "output.json")

combined_data = []

json_files = [f for f in os.listdir(input_folder) if f.endswith(".json")]

for filename in tqdm(json_files, desc="Combining JSON files"):
    file_path = os.path.join(input_folder, filename)
    with open(file_path, 'r', encoding='utf-8') as f:
        try:
            data = json.load(f)
            if isinstance(data, list):
                combined_data.extend(data)
            else:
                combined_data.append(data)
        except json.JSONDecodeError:
            print(f"Skipping invalid JSON file: {filename}")

# Save combined output
os.makedirs(output_final_json_path, exist_ok=True)
with open(output_file, 'w', encoding='utf-8') as f_out:
    json.dump(combined_data, f_out, indent=4)

print(f"\nSaved combined JSON to: {output_file}")