# Extract Polygon Spectra from ENVI Files in Subdirectories
This notebook:
- Searches each subdirectory for a file ending with a specific suffix (e.g., `envi_resample_MicaSense.hdr`)
- Extracts reflectance values for each polygon in a GeoJSON file
- Checks and reprojects CRS if needed
- Saves the result as a CSV file named after the subdirectory

In [1]:
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio
import rasterio.mask
from shapely.geometry import box, mapping

## Function to check and reproject polygons to raster CRS

In [2]:
def check_and_reproject(geojson_path, raster_path):
    polygons = gpd.read_file(geojson_path)
    with rasterio.open(raster_path) as src:
        raster_crs = src.crs
    if polygons.crs != raster_crs:
        print(f"🔄 Reprojecting GeoJSON from {polygons.crs} to {raster_crs}")
        polygons = polygons.to_crs(raster_crs)
    else:
        print("✅ GeoJSON and Raster have the same CRS")
    return polygons

## Function to extract reflectance values inside each polygon

In [3]:
def extract_pixel_reflectance(geojson_path, raster_path, output_csv):
    polygons = check_and_reproject(geojson_path, raster_path)
    with rasterio.open(raster_path) as src:
        wavelengths = src.descriptions
        raster_bounds = src.bounds
        transform = src.transform
    wavelengths = [w.replace(" ", "_") if w else f"Band_{i+1}" for i, w in enumerate(wavelengths)]
    raster_extent = box(*raster_bounds)
    polygons = polygons[polygons.geometry.intersects(raster_extent)]
    if polygons.empty:
        print("❌ No polygons found within the raster extent!")
        return
    print(f"✅ {len(polygons)} polygons found within raster extent.")
    extracted_data = []
    for poly_idx, polygon in polygons.iterrows():
        polygon_id = polygon.get("OBJECTID", poly_idx)
        with rasterio.open(raster_path) as src:
            out_image, out_transform = rasterio.mask.mask(src, [mapping(polygon.geometry)], crop=True)
            out_image = out_image.astype(np.float32)
            rows, cols = out_image.shape[1], out_image.shape[2]
            x_coords = np.arange(cols) * out_transform[0] + out_transform[2]
            y_coords = np.arange(rows) * out_transform[4] + out_transform[5]
            pixel_id = 0
            for i in range(rows):
                for j in range(cols):
                    reflectance_values = out_image[:, i, j]
                    if np.any(reflectance_values > 0):
                        row = {
                            "Polygon_ID": polygon_id,
                            "Pixel_ID": f"{polygon_id}_{pixel_id}",
                            "X_Coordinate": x_coords[j],
                            "Y_Coordinate": y_coords[i],
                            **{wavelengths[b]: reflectance_values[b] for b in range(len(wavelengths))}
                        }
                        extracted_data.append(row)
                        pixel_id += 1
    df = pd.DataFrame(extracted_data)
    df.to_csv(output_csv, index=False)
    print(f"📂 Reflectance data saved to: {output_csv}")

## Function to loop through subdirectories and extract spectra

In [6]:
def run_extraction_for_envi_folders(geojson_path, parent_dir, output_dir, target_suffix="envi_resample_MicaSense.img"):
    os.makedirs(output_dir, exist_ok=True)
    for subdir, dirs, files in os.walk(parent_dir):
        matching_files = [f for f in files if f.endswith(target_suffix)]
        if not matching_files:
            continue
        if len(matching_files) > 1:
            print(f"⚠️ Multiple files with target suffix in {subdir}, using the first one.")
        envi_hdr_path = os.path.join(subdir, matching_files[0])
        subdir_name = os.path.basename(subdir)
        output_csv = os.path.join(output_dir, f"{subdir_name}_spectra.csv")
        print(f"\n🔍 Processing: {envi_hdr_path}")
        extract_pixel_reflectance(geojson_path, envi_hdr_path, output_csv)

## Run the extraction

In [9]:
# Define input paths
geojson_path = "/data-store/iplant/home/shared/earthlab/macrosystems/field-data/analysis_ready_polygons/aop_polygons_1_24_2025_analysis_ready.geojson"
parent_raster_dir = "/data-store/iplant/home/shared/earthlab/macrosystems/processed_flight_lines/NIWO_2023_07"
output_csv_dir = "/home/jovyan/data-store/cross-sensor-cal/drone_cor_sup/out_csv/NIWO_poly_spectra"

# Run batch extraction
run_extraction_for_envi_folders(
    geojson_path=geojson_path,
    parent_dir=parent_raster_dir,
    output_dir=output_csv_dir,
    target_suffix="envi_resample_MicaSense.img"
)


🔍 Processing: /data-store/iplant/home/shared/earthlab/macrosystems/processed_flight_lines/NIWO_2023_07/NEON_D13_NIWO_DP1_L003-1_20230815_directional_reflectance/NEON_D13_NIWO_DP1_L003-1_20230815_directional_reflectance__envi_resample_MicaSense.img
🔄 Reprojecting GeoJSON from EPSG:5070 to EPSG:32613
✅ 626 polygons found within raster extent.
📂 Reflectance data saved to: /home/jovyan/data-store/cross-sensor-cal/drone_cor_sup/out_csv/NIWO_poly_spectra/NEON_D13_NIWO_DP1_L003-1_20230815_directional_reflectance_spectra.csv

🔍 Processing: /data-store/iplant/home/shared/earthlab/macrosystems/processed_flight_lines/NIWO_2023_07/NEON_D13_NIWO_DP1_L019-1_20230815_directional_reflectance/NEON_D13_NIWO_DP1_L019-1_20230815_directional_reflectance__envi_resample_MicaSense.img
🔄 Reprojecting GeoJSON from EPSG:5070 to EPSG:32613
✅ 231 polygons found within raster extent.
📂 Reflectance data saved to: /home/jovyan/data-store/cross-sensor-cal/drone_cor_sup/out_csv/NIWO_poly_spectra/NEON_D13_NIWO_DP1_L019