In [2]:
import json
import os
import numpy as np
from PIL import Image, ImageDraw
from tqdm import tqdm
from pathlib import Path
import shutil

In [20]:
def load_shapes_from_json(json_path):
    with open(json_path, "r") as f:
        data = json.load(f)

    shapes = []
    if "features" in data and "xy" in data["features"]:
        for item in data["features"]["xy"]:
            try:
                polygon = wkt.loads(item["wkt"])
                subtype = item.get("properties", {}).get("subtype", "no-damage")
                class_id = DAMAGE_MAPPING.get(subtype, 0)
                shapes.append((polygon, class_id))
            except Exception as e:
                print(f"Error at json file: {json_path}: {e}")
                continue
    return data

In [21]:
json_path = "../data/data_secondary_unet/split/train/_annotations.coco.json"

data = load_shapes_from_json(json_path)

In [5]:
#data['annotations']

### Using the following categories:
**0: No Building 
1: Building - No Damage 
2: Building - Damage**

In [7]:
split = ['train','test','val']

for split in split:
    json_path = f"../data/data_secondary_unet/split/{split}/_annotations.coco.json"
    output_mask_dir = f"../data/data_secondary_unet/split/{split}/masks"
    mask_size = (1024, 1024)

    with open(json_path, 'r') as f:
        coco = json.load(f)

    images = {img['id']: img for img in coco['images']}
    annotations = coco['annotations']

    os.makedirs(output_mask_dir, exist_ok=True)

    image_to_annotations = {}
    for ann in annotations:
        image_id = ann['image_id']
        image_to_annotations.setdefault(image_id, []).append(ann)

    def draw_mask(anns, size):
        mask = Image.new("L", size, 0)
        draw = ImageDraw.Draw(mask)

        for ann in anns:
            category_id = ann['category_id']
            segs = ann['segmentation']
            for seg in segs:
                poly = [(seg[i], seg[i + 1]) for i in range(0, len(seg), 2)]
                draw.polygon(poly, outline=category_id, fill=category_id)

        return np.array(mask, dtype=np.uint8)


    for image_id, image_info in tqdm(images.items()):
        filename = image_info['file_name']
        width, height = image_info['width'], image_info['height']

        anns = image_to_annotations.get(image_id, [])
        mask_array = draw_mask(anns, size=(width, height))

        # Save the mask
        out_path = os.path.join(output_mask_dir, filename.replace('.jpg', '_mask.png'))
        Image.fromarray(mask_array).save(out_path)

100%|██████████| 70/70 [00:00<00:00, 197.29it/s]
100%|██████████| 37/37 [00:00<00:00, 230.25it/s]
100%|██████████| 37/37 [00:00<00:00, 234.32it/s]


In [15]:
import os
from pathlib import Path
import shutil

split = ['train','test','val']

for split in split:
    # Paths
    mask_dir = Path(f"../data/data_secondary_unet/split/{split}/masks")
    pre_image_dir = Path("../data/data_secondary_unet/raw/pre")
    output_dir = Path(f"../data/data_secondary_unet/split/{split}/pre_images")
    output_dir.mkdir(parents=True, exist_ok=True)

    for mask_file in os.listdir(mask_dir):
        if not mask_file.endswith("_mask.png"):
            continue

        # Strip the '_mask.png' suffix and split before '_png.rf'
        try:
            base_name = mask_file.replace("_mask.png", "")
            tile_id = base_name.split("_png.rf")[0]  # e.g. 'gaza_city_jabalya_1_1'
            pre_filename = f"{tile_id}_pre.png"
            pre_path = pre_image_dir / pre_filename

            if pre_path.exists():
                shutil.copy(pre_path, output_dir / pre_filename)
            else:
                print(f"Missing pre-disaster image for: {tile_id}")
        except Exception as e:
            print(f"Error processing {mask_file}: {e}")


In [16]:
import os
import shutil
from pathlib import Path

    # Define paths
split = ['train','test','val']

for split in split:
    base_dir = Path(f"../data/data_secondary_unet/split/{split}")
    pre_dir = base_dir / "pre_images"
    post_dir = base_dir / "post_images"
    output_dir = base_dir / "images"

    # Create output directory if it doesn't exist
    output_dir.mkdir(exist_ok=True)

    # Step 1: Get all pre-disaster base names
    pre_images = list(pre_dir.glob("*.png"))

    for pre_path in pre_images:
        # Extract the base key: beit_hanoun_0_0
        base_key = pre_path.stem.replace("_pre", "")

        # Define target filenames
        pre_target = output_dir / f"{base_key}_pre_disaster.png"
        post_target = output_dir / f"{base_key}_post_disaster.png"

        # Try to find matching post-disaster image (contains base_key)
        matching_post = list(post_dir.glob(f"{base_key}_*.jpg"))

        if matching_post:
            # Convert post image to .png
            post_img = matching_post[0]
            from PIL import Image
            img = Image.open(post_img).convert("RGB")
            img.save(post_target)

            # Copy pre image as-is (already .png)
            shutil.copy(pre_path, pre_target)

            print(f"Processed: {base_key}")
        else:
            print(f"No matching post image for: {base_key}")


Processed: gaza_city_jabalya_4_3
Processed: gaza_city_jabalya_3_1
Processed: beit_hanoun_5_1
Processed: beit_hanoun_0_4
Processed: gaza_city_jabalya_5_0
Processed: beit_hanoun_3_0
Processed: gaza_city_jabalya_2_2
Processed: beit_hanoun_0_5
Processed: beit_hanoun_3_1
Processed: gaza_city_jabalya_2_3
Processed: beit_hanoun_4_3
Processed: beit_hanoun_2_2
Processed: gaza_city_jabalya_0_4
Processed: beit_hanoun_5_0
Processed: beit_hanoun_3_3
Processed: gaza_city_shejaiya_5_4
Processed: gaza_city_jabalya_2_1
Processed: khan_yunis_3_0
Processed: beit_hanoun_1_4
Processed: gaza_city_shejaiya_0_1
Processed: beit_hanoun_5_2
Processed: gaza_city_jabalya_3_2
Processed: beit_hanoun_2_1
Processed: beit_hanoun_1_5
Processed: gaza_city_shejaiya_4_6
Processed: gaza_city_shejaiya_0_0
Processed: beit_hanoun_5_3
Processed: beit_hanoun_3_2
Processed: beit_hanoun_4_0
Processed: gaza_city_jabalya_2_0
Processed: gaza_city_jabalya_1_4
Processed: gaza_city_shejaiya_1_6
Processed: beit_hanoun_4_5
Processed: gaza

In [18]:
import os
import re

splits = ['train', 'test', 'val']

for split in splits:
    # Folder containing your mask files
    folder = f'../data/data_secondary_unet/split/{split}/masks'

    # Pattern to match and remove the '.png.rf.<hash>' part before '_mask.png'
    pattern = re.compile(r'(.*)_png\.rf\.[a-f0-9]+(_mask\.png)')

    for filename in os.listdir(folder):
        # Only process PNG files
        if filename.endswith('_mask.png'):
            match = pattern.match(filename)
            if match:
                new_name = match.group(1) + match.group(2)
                old_path = os.path.join(folder, filename)
                new_path = os.path.join(folder, new_name)
                print(f'Renaming:\n  {filename}\n  -> {new_name}')
                os.rename(old_path, new_path)

Renaming:
  gaza_city_jabalya_1_0_png.rf.6cf2144567456736b31a7cd85d38ed64_mask.png
  -> gaza_city_jabalya_1_0_mask.png
Renaming:
  beit_hanoun_5_0_png.rf.1248d03f32112e8dd06ff4103e418e6a_mask.png
  -> beit_hanoun_5_0_mask.png
Renaming:
  beit_hanoun_3_4_png.rf.c39a6b03ef7b3eb584ecf8e4b41189eb_mask.png
  -> beit_hanoun_3_4_mask.png
Renaming:
  beit_hanoun_0_4_png.rf.ce268a5fa5cde69c29deb55638da380b_mask.png
  -> beit_hanoun_0_4_mask.png
Renaming:
  khan_yunis_1_1_png.rf.b53ea5e8b2a0d2ee37e883665fa1d0ba_mask.png
  -> khan_yunis_1_1_mask.png
Renaming:
  beit_hanoun_5_3_png.rf.1357b553568e3461af2fa6fad2d66aaa_mask.png
  -> beit_hanoun_5_3_mask.png
Renaming:
  gaza_city_jabalya_3_2_png.rf.a01cb095fc1bfc90deb20df50db4ca1f_mask.png
  -> gaza_city_jabalya_3_2_mask.png
Renaming:
  gaza_city_jabalya_0_0_png.rf.85aeb6800a1a1d0ad4339b22df9e8604_mask.png
  -> gaza_city_jabalya_0_0_mask.png
Renaming:
  gaza_city_jabalya_1_4_png.rf.c61914a56c1e685b9046a36f63c191ba_mask.png
  -> gaza_city_jabalya_1_4_