In [1]:
import geopandas as gpd
import rasterio
import rasterio.mask
import numpy as np
import pandas as pd
from shapely.geometry import box
from shapely.geometry import mapping

def check_and_reproject(geojson_path, raster_path):
    """
    Checks if the GeoJSON polygons and the ENVI raster have the same CRS.
    If not, reprojects the polygons to match the raster's CRS.
    """
    # Load GeoJSON
    polygons = gpd.read_file(geojson_path)

    # Load raster CRS
    with rasterio.open(raster_path) as src:
        raster_crs = src.crs

    # Check if CRS matches
    if polygons.crs != raster_crs:
        print(f"🔄 Reprojecting GeoJSON from {polygons.crs} to {raster_crs}")
        polygons = polygons.to_crs(raster_crs)
    else:
        print("✅ GeoJSON and Raster have the same CRS")

    return polygons

def extract_pixel_reflectance(geojson_path, raster_path, output_csv):
    """
    Extracts reflectance values for each pixel inside each polygon and saves to CSV.
    """
    # Reproject GeoJSON if needed
    polygons = check_and_reproject(geojson_path, raster_path)

    # Open raster and get metadata
    with rasterio.open(raster_path) as src:
        wavelengths = src.descriptions  # Get actual wavelengths (e.g., ["444nm", "475nm", ...])
        # # Get band descriptions or assign default names if descriptions are missing
        # if src.descriptions and all(src.descriptions):
        #     wavelengths = src.descriptions  # Use actual descriptions if available
        # else:
        #     wavelengths = [f"Band_{i+1}" for i in range(src.count)]  # Assign default names
        
        raster_crs = src.crs
        raster_bounds = src.bounds
        raster_transform = src.transform  # Affine transform for pixel-to-geo mapping

    # Convert wavelengths to valid column names
    wavelengths = [w.replace(" ", "_") for w in wavelengths]  # Remove spaces if any

    # Convert raster bounds to a polygon
    raster_extent = box(*raster_bounds)

    # Filter polygons that are within the raster extent
    polygons = polygons[polygons.geometry.intersects(raster_extent)]

    # If no polygons remain, exit
    if polygons.empty:
        print("❌ No polygons found within the raster extent!")
        return

    print(f"✅ {len(polygons)} polygons found within raster extent.")

    # Prepare a list to store extracted data
    extracted_data = []

    # Loop through each polygon
    for poly_idx, polygon in polygons.iterrows():
        polygon_id = polygon.get("OBJECTID", poly_idx)  # Use an ID field if available

        # Mask the raster to extract pixel values within the polygon
        with rasterio.open(raster_path) as src:
            out_image, out_transform = rasterio.mask.mask(src, [mapping(polygon.geometry)], crop=True)
            out_image = out_image.astype(np.float32)  # Convert to float for precision

            # Get pixel coordinates
            num_rows, num_cols = out_image.shape[1], out_image.shape[2]
            x_coords = np.arange(num_cols) * out_transform[0] + out_transform[2]
            y_coords = np.arange(num_rows) * out_transform[4] + out_transform[5]

            # Loop over each pixel
            pixel_id = 0
            for i in range(num_rows):
                for j in range(num_cols):
                    # Extract reflectance for all bands
                    reflectance_values = out_image[:, i, j]

                    # Check if pixel is valid (not nodata)
                    if np.any(reflectance_values > 0):  # Assuming negative values are NoData
                        row = {
                            "Polygon_ID": polygon_id,
                            "Pixel_ID": f"{polygon_id}_{pixel_id}",  # Unique ID for each pixel
                            "X_Coordinate": x_coords[j],
                            "Y_Coordinate": y_coords[i],
                            **{wavelengths[b]: reflectance_values[b] for b in range(len(wavelengths))}
                        }
                        extracted_data.append(row)
                        pixel_id += 1  # Increment pixel counter

    # Convert to DataFrame
    df = pd.DataFrame(extracted_data)

    # Save to CSV
    df.to_csv(output_csv, index=False)
    print(f"📂 Reflectance data saved to: {output_csv}")


    # # 🔥 PLOTTING: Raster + Overlapping Polygons
    # print("📊 Plotting raster and polygons...")

    # fig, ax = plt.subplots(figsize=(10, 8))

    # # Plot raster (use first band)
    # with rasterio.open(raster_path) as src:
    #     rasterio.plot.show(src.read(1), transform=src.transform, ax=ax, cmap="gray")

    # # Plot polygons
    # polygons.boundary.plot(ax=ax, edgecolor="red", linewidth=1.5)

    # # Label polygons with IDs
    # for idx, row in polygons.iterrows():
    #     x, y = row.geometry.centroid.x, row.geometry.centroid.y
    #     ax.text(x, y, str(row["OBJECTID"]), fontsize=12, color="blue", weight="bold", ha="center")

    # ax.set_title("Raster with Overlapping Polygons")
    # plt.show()





In [2]:
import os
import glob

def process_all_rasters(base_dir, geojson_path, output_dir):
    """
    Recursively search for all *orthomosaic.tif files in base_dir and run extract_pixel_reflectance on each.
    Output CSV will include both the subdirectory and raster name.
    """
    os.makedirs(output_dir, exist_ok=True)

    # Find all matching TIFF files recursively
    tiff_files = glob.glob(os.path.join(base_dir, '**', '*orthomosaic.tif'), recursive=True)

    if not tiff_files:
        print("❌ No TIFF files found in the directory.")
        return

    print(f"📁 Found {len(tiff_files)} TIFF files.")

    for raster_path in tiff_files:
        raster_name = os.path.splitext(os.path.basename(raster_path))[0]
        subdir_name = os.path.basename(os.path.dirname(raster_path))

        # Combine subdirectory and raster name for unique CSV naming
        output_csv_name = f"{subdir_name}_{raster_name}_10cm_half_diam_before_correct_spectra.csv"
        output_csv = os.path.join(output_dir, output_csv_name)

        print(f"\n🔍 Processing: {raster_path}")
        extract_pixel_reflectance(geojson_path, raster_path, output_csv)



In [3]:
# Folder containing folders of TIFFs
base_raster_dir = "/data-store/iplant/home/shared/earthlab/macrosystems/field-data/output/summer-2024-10cm-10k"
# Path to your GeoJSON
geojson_file = "/data-store/iplant/home/shared/earthlab/macrosystems/field-data/analysis_ready_polygons/uas_polygons_1_24_2025_analysis_ready_half_diam.geojson"
# Where to save CSVs
output_csv_dir = "/home/jovyan/data-store/cross-sensor-cal/drone_cor_sup/out_csv/UAS_2024"

process_all_rasters(base_raster_dir, geojson_file, output_csv_dir)


📁 Found 26 TIFF files.

🔍 Processing: /data-store/iplant/home/shared/earthlab/macrosystems/field-data/output/summer-2024-10cm-10k/beartoothManual_1-07-29-24-ExportPackage/aligned_orthomosaic.tif
✅ GeoJSON and Raster have the same CRS
✅ 43 polygons found within raster extent.
📂 Reflectance data saved to: /home/jovyan/data-store/cross-sensor-cal/drone_cor_sup/out_csv/UAS_2024/beartoothManual_1-07-29-24-ExportPackage_aligned_orthomosaic_10cm_half_diam_before_correct_spectra.csv

🔍 Processing: /data-store/iplant/home/shared/earthlab/macrosystems/field-data/output/summer-2024-10cm-10k/beartoothManual_2-07-29-24-ExportPackage/aligned_orthomosaic.tif
✅ GeoJSON and Raster have the same CRS
✅ 53 polygons found within raster extent.
📂 Reflectance data saved to: /home/jovyan/data-store/cross-sensor-cal/drone_cor_sup/out_csv/UAS_2024/beartoothManual_2-07-29-24-ExportPackage_aligned_orthomosaic_10cm_half_diam_before_correct_spectra.csv

🔍 Processing: /data-store/iplant/home/shared/earthlab/macrosys

RasterioIOError: Read or write failed. /data-store/iplant/home/shared/earthlab/macrosystems/field-data/output/summer-2024-10cm-10k/sistersRanger_2-06-28-24-ExportPackage/aligned_orthomosaic.tif, band 1: IReadBlock failed at X offset 0, Y offset 303: TIFFReadEncodedStrip() failed.