In [None]:
import os
import pandas as pd
from PIL import Image
from torchvision import transforms, datasets
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
"""
The function outputs Pandas DataFrames of the images of the training sets

DataFame descriptions:

data_images = {
                id: (String) id of relevent image (same for both pre and post disaster image)
                image_id: (String) unique id for each image
                location: (String) disaster title
                phase: ("pre" or "post")
                disaster: (String) disaster done to the building
                image_path: (String) path name to grab image later
              }

data_buildings = {
                polygon_id: (String) unique id for each polygon annotation
                image_id: (String) unique id for the relevent image
                phase: ("pre" or "post")
                damage: (String) damage type done to the building (no damage, minor damage, major damage, destroyed)
                polygon: (Polygon Object) unique polygon outlining builiding in the image
                 }

"""

def xbd_grabber(image_data_path, polygon_data_path):
    image_entries = []
    polygon_entries = []

    for json_file in os.listdir(polygon_data_path):
        if json_file.endswith(".json"):
            with open(os.path.join(polygon_data_path, json_file), 'r') as f:
                data = json.load(f)

            # Extract image-level metadata
            image_name = data['metadata']['img_name']
            image_path = os.path.join(image_data_path, image_name.replace(".png", ".tif"))

            base_name = image_name.replace(".png", "")
            parts = base_name.split("_")

            id_ = data['metadata']['id']
            image_id = parts[1]
            phase = parts[2]
            location = data['metadata']['disaster']
            disaster = data['metadata']['disaster_type']

            image_entries.append({
                "id": id_,
                "image_id": image_id,
                "location": location,
                "phase": phase,
                "disaster": disaster,
                "image_path": image_path
            })

            for feature in data['features']['xy']:
                polygon_id = feature["properties"]["uid"]
                damage = "pre" if phase == "pre" else feature["properties"]["subtype"]
                polygon = feature["wkt"]

                polygon_entries.append({
                    "polygon_id": polygon_id,
                    "image_id": image_id,
                    "phase": phase,
                    "damage": damage,
                    "polygon": polygon
                })

    data_images = pd.DataFrame(image_entries)
    data_buildings = pd.DataFrame(polygon_entries)

    return data_images, data_buildings

In [None]:
"""
The function crops each original remote sensing image (1024 × 1024) to 16 remote sensing images (256 × 256)
and saves to a new directory
"""
def crop_and_save_images(paired_images, save_dir_pre, save_dir_post):
    os.makedirs(save_dir_pre, exist_ok=True)
    os.makedirs(save_dir_post, exist_ok=True)

    count = 0
    for pre_path, post_path, label in paired_images:
        pre_img = Image.open(pre_path).convert('RGB')
        post_img = Image.open(post_path).convert('RGB')

        for i in range(0, 1024, 256):
            for j in range(0, 1024, 256):
                pre_crop = pre_img.crop((j, i, j+256, i+256))
                post_crop = post_img.crop((j, i, j+256, i+256))
                crop_name = f"{count}_label{label}.png"
                pre_crop.save(os.path.join(save_dir_pre, crop_name))
                post_crop.save(os.path.join(save_dir_post, crop_name))
                count += 1

In [None]:
data_images_tier1, data_buildings_tier1 = xbd_grabber("./geotiffs/tier1/images/", "./geotiffs/tier1/labels/")
data_images_tier3, data_buildings_tier3 = xbd_grabber("./geotiffs/tier3/images/", "./geotiffs/tier3/labels/")

data_images = pd.concat([data_images_tier1, data_images_tier3], ignore_index=True)
data_buildings = pd.concat([data_buildings_tier1, data_buildings_tier3], ignore_index=True)

In [None]:
""" Creates array of image path pairs for pre and post images """
paired_images = []
df_pre = data_images[data_images['phase'] == 'pre']
df_post = data_images[data_images['phase'] == 'post']
merged = pd.merge(df_pre, df_post, on='id', suffixes=('_pre', '_post'))

for _, row in merged.iterrows():
    pre_path = row['image_path_pre']
    post_path = row['image_path_post']
    paired_images.append((pre_path, post_path))

crop_and_save_images(paired_images, "./geotiffs/cropped_training_pre", "./geotiffs/cropped_training_post")