In [1]:
import tensorflow as tf
import numpy as np
import torch
import torch.nn as nn
import os
from torch.utils.data import Dataset, DataLoader
from PIL import Image, ImageDraw
import torchvision.transforms as transforms
import tifffile
import json
from shapely import wkt
import pandas as pd
import torch.nn.functional as F

2025-04-15 20:36:44.277124: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
image_dir = '/Users/ashleychen/Downloads/geotiffs/hold/images'
label_dir = '/Users/ashleychen/Downloads/geotiffs/hold/labels'

In [7]:

def extract_metadata(image_dir, label_dir, output_csv="full_metadata.csv"):
    image_filenames = sorted(os.listdir(image_dir))
    label_filenames = sorted(os.listdir(label_dir))

    data = []

    for img_name, label_name in zip(image_filenames, label_filenames):
        base_name = img_name.replace(".tif", "")
        parts = base_name.split("_")

        if len(parts) >= 4:
            first = parts[0]
            first_split = first.split("-")
            if first_split[0] == "hurricane":
                disaster_type = "hurricane"
            else:
                disaster_type = first_split[-1]
            stage = parts[2]
        else:
            disaster_type = ""
            stage = ""

        label_path = os.path.join(label_dir, label_name)
        with open(label_path, 'r') as f:
            label_data = json.load(f)

        features = label_data.get("features", {}).get("lng_lat", [])
        for feature in features:
            props = feature.get("properties", {})
            feature_type = props.get("feature_type", "")
            subtype = props.get("subtype", "")
            uid = props.get("uid", "")
            wkt_str = feature.get("wkt", "")
            try:
                polygon = wkt.loads(wkt_str)
                bbox = polygon.bounds  # (minx, miny, maxx, maxy)
                centroid = polygon.centroid.coords[0]
            except:
                bbox = (None, None, None, None)
                centroid = (None, None)

            data.append({
                "image_filename": img_name,
                "label_filename": label_name,
                "stage": stage,
                "disaster_type": disaster_type,
                "feature_type": feature_type,
                "subtype": subtype,
                "uid": uid,
                "wkt": wkt_str,
                "bbox_minx": bbox[0],
                "bbox_miny": bbox[1],
                "bbox_maxx": bbox[2],
                "bbox_maxy": bbox[3],
                "centroid_x": centroid[0],
                "centroid_y": centroid[1]
            })
    df = pd.DataFrame(data)
    return df

In [8]:
df = extract_metadata(image_dir, label_dir)
print(df.head())

df.to_csv("hold_metadata.csv", index=False)

                                 image_filename  \
0  guatemala-volcano_00000004_post_disaster.tif   
1  guatemala-volcano_00000004_post_disaster.tif   
2  guatemala-volcano_00000004_post_disaster.tif   
3  guatemala-volcano_00000004_post_disaster.tif   
4  guatemala-volcano_00000004_post_disaster.tif   

                                  label_filename stage disaster_type  \
0  guatemala-volcano_00000004_post_disaster.json  post       volcano   
1  guatemala-volcano_00000004_post_disaster.json  post       volcano   
2  guatemala-volcano_00000004_post_disaster.json  post       volcano   
3  guatemala-volcano_00000004_post_disaster.json  post       volcano   
4  guatemala-volcano_00000004_post_disaster.json  post       volcano   

  feature_type       subtype                                   uid  \
0     building  major-damage  104d384e-412b-4a72-8361-3c6c75cafdf0   
1     building  major-damage  9badd81e-16bf-4406-b8ad-9cb50a208011   
2     building  major-damage  ea0cde31-1b8c-4951-a