In [None]:
import rasterio
from rasterio.plot import reshape_as_image
from PIL import Image
import numpy as np
import os
import glob

input_dir = "../data/geotiffs/hold/images/"
output_dir = "images_png/"
os.makedirs(output_dir, exist_ok=True)

def normalize(array):
    """Scale array to 0-255 and convert to uint8"""
    array_min, array_max = array.min(), array.max()
    if array_max - array_min == 0:
        return np.zeros_like(array, dtype=np.uint8)
    norm_array = (array - array_min) / (array_max - array_min) * 255
    return norm_array.astype(np.uint8)

# Get sorted list of pre-disaster files
pre_files = sorted(glob.glob(os.path.join(input_dir, "*_pre_disaster.tif")))

# Take the first 10 pre-disaster files and find matching post-disaster files
for pre_file in pre_files[:10]:
    post_file = pre_file.replace("_pre_disaster.tif", "_post_disaster.tif")
    pair = [pre_file, post_file]
    
    for tif_path in pair:
        if not os.path.exists(tif_path):
            print(f"Skipping missing file: {tif_path}")
            continue
        
        with rasterio.open(tif_path) as src:
            if src.count >= 3:
                img_array = src.read([1, 2, 3])
                img_array = reshape_as_image(img_array)
            else:
                img_array = src.read(1)
        
        img_array = normalize(img_array)
        img = Image.fromarray(img_array)
        
        png_path = os.path.join(output_dir, os.path.basename(tif_path).replace(".tif", ".png"))
        img.save(png_path)
        print(f"Converted {tif_path} → {png_path}")



Converted data/geotiffs/hold/images/guatemala-volcano_00000004_pre_disaster.tif → images_png/guatemala-volcano_00000004_pre_disaster.png
Converted data/geotiffs/hold/images/guatemala-volcano_00000004_post_disaster.tif → images_png/guatemala-volcano_00000004_post_disaster.png
Converted data/geotiffs/hold/images/guatemala-volcano_00000012_pre_disaster.tif → images_png/guatemala-volcano_00000012_pre_disaster.png
Converted data/geotiffs/hold/images/guatemala-volcano_00000012_post_disaster.tif → images_png/guatemala-volcano_00000012_post_disaster.png
Converted data/geotiffs/hold/images/guatemala-volcano_00000014_pre_disaster.tif → images_png/guatemala-volcano_00000014_pre_disaster.png
Converted data/geotiffs/hold/images/guatemala-volcano_00000014_post_disaster.tif → images_png/guatemala-volcano_00000014_post_disaster.png
Converted data/geotiffs/hold/images/guatemala-volcano_00000020_pre_disaster.tif → images_png/guatemala-volcano_00000020_pre_disaster.png
Converted data/geotiffs/hold/images

In [5]:
import os

base_dir = "../data/geotiffs/"
subfolders = ["hold", "tier1", "tier3", "test"]

for folder in subfolders:
    images_path = os.path.join(base_dir, folder, "images")
    if not os.path.exists(images_path):
        print(f"Folder not found: {images_path}")
        continue

    tif_files = [f for f in os.listdir(images_path) if f.lower().endswith(".tif")]
    total_size = sum(os.path.getsize(os.path.join(images_path, f)) for f in tif_files)

    print(f"{folder}:")
    print(f"  Number of .tif files: {len(tif_files)}")
    print(f"  Total size: {total_size / (1024**2):.2f} MB\n")


hold:
  Number of .tif files: 1866
  Total size: 11218.87 MB

tier1:
  Number of .tif files: 5598
  Total size: 33656.61 MB

tier3:
  Number of .tif files: 12738
  Total size: 76584.12 MB

test:
  Number of .tif files: 1866
  Total size: 11218.87 MB



In [7]:
import os
import glob

base_dir = "../data/geotiffs/"
subfolders = ["hold", "tier1", "tier3", "test"]

for folder in subfolders:
    images_path = os.path.join(base_dir, folder, "images")
    if not os.path.exists(images_path):
        print(f"Folder not found: {images_path}")
        continue

    # Find all pre-disaster files
    pre_files = glob.glob(os.path.join(images_path, "*_pre_disaster.tif"))
    
    pair_count = 0
    for pre_file in pre_files:
        post_file = pre_file.replace("_pre_disaster.tif", "_post_disaster.tif")
        if os.path.exists(post_file):
            pair_count += 1

    print(f"{folder}:")
    print(f"  Number of pre/post pairs: {pair_count}")
    print(f"  Total pre files: {len(pre_files)}\n")


hold:
  Number of pre/post pairs: 933
  Total pre files: 933

tier1:
  Number of pre/post pairs: 2799
  Total pre files: 2799

tier3:
  Number of pre/post pairs: 6369
  Total pre files: 6369

test:
  Number of pre/post pairs: 933
  Total pre files: 933



In [1]:
import os
import glob

folder_path = "chips_post/hold/"

# Find all PNG files starting with 'guatemala-volcano'
files = glob.glob(os.path.join(folder_path, "guatemala-volcano*.png"))

print(f"Number of Guatemala volcano PNGs: {len(files)}")

Number of Guatemala volcano PNGs: 206
