# Polygon Spectra Extraction with Polygon Attribute Merge and Raster Plotting

In [8]:
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio
import rasterio.mask
import matplotlib.pyplot as plt
import rasterio.plot
from shapely.geometry import box, mapping
from concurrent.futures import ProcessPoolExecutor, as_completed
import logging

In [9]:
# Set up logging
logging.basicConfig(filename='extraction_log.txt', level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')

## Define functions

In [10]:
def check_and_reproject(geojson_path, raster_path):
    polygons = gpd.read_file(geojson_path)
    with rasterio.open(raster_path) as src:
        raster_crs = src.crs
    if polygons.crs != raster_crs:
        logging.info(f"Reprojecting polygons to match raster CRS.")
        polygons = polygons.to_crs(raster_crs)
    return polygons

def extract_pixel_reflectance(geojson_path, raster_path, output_csv, plot_dir=None, plot_enabled=True):
    try:
        polygons = check_and_reproject(geojson_path, raster_path)
        with rasterio.open(raster_path) as src:
            wavelengths = src.descriptions
            raster_bounds = src.bounds
            transform = src.transform
        wavelengths = [w.replace(" ", "_") if w else f"Band_{i+1}" for i, w in enumerate(wavelengths)]
        raster_extent = box(*raster_bounds)
        polygons = polygons[polygons.geometry.intersects(raster_extent)]
        if polygons.empty:
            logging.warning(f"No polygons intersect with {raster_path}")
            return
        extracted_data = []
        for poly_idx, polygon in polygons.iterrows():
            polygon_id = polygon.get("OBJECTID", poly_idx)
            with rasterio.open(raster_path) as src:
                out_image, out_transform = rasterio.mask.mask(src, [mapping(polygon.geometry)], crop=True)
                out_image = out_image.astype(np.float32)
                rows, cols = out_image.shape[1], out_image.shape[2]
                x_coords = np.arange(cols) * out_transform[0] + out_transform[2]
                y_coords = np.arange(rows) * out_transform[4] + out_transform[5]
                pixel_id = 0
                for i in range(rows):
                    for j in range(cols):
                        reflectance_values = out_image[:, i, j]
                        if np.any(reflectance_values > 0):
                            row = {
                                "Polygon_ID": polygon_id,
                                "Pixel_ID": f"{polygon_id}_{pixel_id}",
                                "X_Coordinate": x_coords[j],
                                "Y_Coordinate": y_coords[i],
                                **{wavelengths[b]: reflectance_values[b] for b in range(len(wavelengths))}
                            }
                            extracted_data.append(row)
                            pixel_id += 1
        df = pd.DataFrame(extracted_data)
        poly_attrs = polygons.copy()
        poly_attrs["Polygon_ID"] = poly_attrs.get("OBJECTID", poly_attrs.index)
        poly_attrs = poly_attrs.drop(columns="geometry")
        df = df.merge(poly_attrs, on="Polygon_ID", how="left")
        df.to_csv(output_csv, index=False)
        logging.info(f"Saved spectra to {output_csv}")

        if plot_enabled and plot_dir:
            try:
                os.makedirs(plot_dir, exist_ok=True)
                with rasterio.open(raster_path) as src:
                    fig, ax = plt.subplots(figsize=(10, 8))
                    rasterio.plot.show(src.read(1), transform=src.transform, ax=ax, cmap='gray')
                    polygons.boundary.plot(ax=ax, edgecolor='red', linewidth=1)
                    for idx, row in polygons.iterrows():
                        x, y = row.geometry.centroid.x, row.geometry.centroid.y
                        pid = row.get("OBJECTID", idx)
                        ax.text(x, y, str(pid), fontsize=9, color='blue', ha='center')
                    plot_name = os.path.splitext(os.path.basename(output_csv))[0] + '.png'
                    plt.savefig(os.path.join(plot_dir, plot_name))
                    plt.close()
            except Exception as e:
                logging.warning(f"Could not plot for {raster_path}: {e}")
    except Exception as e:
        logging.error(f"Failed on {raster_path}: {e}")

In [11]:
def batch_extract_polygon_spectra_parallel(geojson_path, raster_base_dir, output_dir,
                                           target_suffix="envi_resample_MicaSense.hdr",
                                           filters=None, max_workers=4, plot_enabled=True):
    import os, glob, logging
    from concurrent.futures import ProcessPoolExecutor, as_completed

    os.makedirs(output_dir, exist_ok=True)
    plot_dir = os.path.join(output_dir, 'plots') if plot_enabled else None

    # 🔍 Recursively search all files and match suffix
    all_files = glob.glob(os.path.join(raster_base_dir, '**', '*'), recursive=True)
    matching_files = [f for f in all_files if f.endswith(target_suffix)]

    # ✅ Optional additional filtering
    if filters:
        matching_files = [f for f in matching_files if all(k in f for k in filters)]

    if not matching_files:
        print("❌ No matching raster files found.")
        return

    print(f"📁 Found {len(matching_files)} raster files to process.")
    logging.info(f"Found {len(matching_files)} raster files.")

    tasks = []
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        for raster_path in matching_files:
            raster_name = os.path.splitext(os.path.basename(raster_path))[0]
            subdir_name = os.path.basename(os.path.dirname(raster_path))
            output_csv = os.path.join(output_dir, f"{subdir_name}_{raster_name}_uas_poly_AOP_spectra.csv")

            tasks.append(executor.submit(
                extract_pixel_reflectance, geojson_path, raster_path, output_csv, plot_dir, plot_enabled))

        for future in as_completed(tasks):
            future.result()


In [12]:
batch_extract_polygon_spectra_parallel(
    geojson_path="/data-store/iplant/home/shared/earthlab/macrosystems/field-data/analysis_ready_polygons/uas_polygons_1_24_2025_analysis_ready.geojson",
    raster_base_dir="/data-store/iplant/home/shared/earthlab/macrosystems/processed_flight_lines/NIWO_2023_08",
    output_dir="/home/jovyan/data-store/cross-sensor-cal/drone_cor_sup/out_csv/NIWO_poly_spectra",
    target_suffix="envi_resample_MicaSense.hdr",
    # filters=["ABBY", "2021", "10cm"],
    max_workers=6,
    plot_enabled=True
)


📁 Found 5 raster files to process.
