In [1]:
import os
import rasterio
from shapely.geometry import mapping, box
import geopandas as gpd
import pandas as pd
from shapely import wkt
from shapely.ops import transform
from datetime import datetime
from pyproj import Transformer
import os
from rasterio.mask import mask
import numpy as np
from PIL import Image

def check_polygon_within_tiff_bounds(csv_path, tiff_folder):
    ### Check if the bounds of polygons in a CSV file exist within any of the TIFF files in a folder.

    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_path)
    
    # Convert WKT geometries to Shapely geometries
    df['geometry'] = df['geometry_wkt'].apply(wkt.loads)
    
    # Convert DataFrame to GeoDataFrame
    gdf = gpd.GeoDataFrame(df, geometry='geometry')
    
    # Prepare a dictionary to hold results
    result = {}
    
    # Iterate over each polygon in the GeoDataFrame
    for index, row in gdf.iterrows():
        polygon_geom = row['geometry']
        polygon_id = row['plotID']
        result[polygon_id] = []
        
        # Iterate over all TIFF files in the specified folder
        for tiff_file in os.listdir(tiff_folder):
            if tiff_file.endswith('.tif'):
                tiff_path = os.path.join(tiff_folder, tiff_file)

                with rasterio.open(tiff_path) as src:                    
                    # Obtain CRS from TIFF file
                    tiff_crs = src.crs
                    
                    # Create a transformer to convert from TIFF CRS to WGS84
                    transformer = Transformer.from_crs(tiff_crs, "EPSG:4326", always_xy=True)
                    
                    # Convert TIFF bounds from UTM to lat/lon
                    minx, miny, maxx, maxy = src.bounds
                    # Transform each corner of the bounding box
                    minx_latlon, miny_latlon = transformer.transform(minx, miny)
                    maxx_latlon, maxy_latlon = transformer.transform(maxx, maxy)
                    
                    # Create the bounding box in lat/lon
                    tiff_bounds_latlon = box(minx_latlon, miny_latlon, maxx_latlon, maxy_latlon)
                    
                    # Check if the polygon's geometry intersects with the TIFF's bounds
                    if polygon_geom.intersects(tiff_bounds_latlon):
                        result[polygon_id].append(tiff_file)
    
    return result

def display_first_tiff_info(tiff_dir):
    # Get the list of all files in the directory
    files = os.listdir(tiff_dir)
    
    # Filter for files that end with .tif or .tiff
    tiff_files = [file for file in files if file.endswith(('.tif', '.tiff'))]
    
    if not tiff_files:
        print("No GeoTIFF files found in the directory.")
        return
    
    # Read the first GeoTIFF file
    first_tiff_path = os.path.join(tiff_dir, tiff_files[0])
    
    with rasterio.open(first_tiff_path) as dataset:
        # Get the coordinate reference system (CRS)
        crs = dataset.crs
        # Get the bounding box of the raster
        bounds = dataset.bounds
        metadata = dataset.meta
        
        # Print the CRS and bounds
        print(f"GeoTIFF file: {tiff_files[0]}")
        print(f"Coordinate Reference System (CRS): {crs}")
        print(f"Bounds: {bounds}")
        
        # Check if the CRS is geographic or projected
        if crs.is_geographic:
            print("The CRS is geographic (likely in lat/lon).")
        elif crs.is_projected:
            print("The CRS is projected (likely UTM or another projected system).")

        print(f"Metadata: {metadata}")

def clip_polygons_to_tiffs(polygon_csv, polygon_tiff_matches, tiff_folder, output_folder):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(polygon_csv)
    
    # Convert WKT geometries to Shapely geometries
    df['geometry'] = df['geometry_wkt'].apply(wkt.loads)
    
    # Create a GeoDataFrame from the DataFrame
    gdf = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326')  # Assume lat/lon CRS

    # Create a directory for the clipped JPEG files if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Get CRS from the first TIFF file
    first_tiff = os.listdir(tiff_folder)[0]
    with rasterio.open(os.path.join(tiff_folder, first_tiff)) as src:
        tiff_crs = src.crs

    # Transformer for coordinate conversion
    transformer = Transformer.from_crs("EPSG:4326", tiff_crs.to_string(), always_xy=True)

    # Initialize list to collect CSV data
    csv_data = []

    for polygon_id, tiff_files in polygon_tiff_matches.items():
        print(f"Processing polygon {polygon_id}...")

        # Get the polygon geometry
        polygon_geom = gdf[gdf['plotID'] == polygon_id].geometry.iloc[0]

        # Transform the polygon to the TIFF's CRS
        transformed_polygon_geom = transform(
            lambda x, y: transformer.transform(x, y),
            polygon_geom
        )
        
        # Create a bounding box for clipping
        minx, miny, maxx, maxy = transformed_polygon_geom.bounds

        for tiff_file in tiff_files:
            tiff_path = os.path.join(tiff_folder, tiff_file)
            print(f"Clipping with TIFF file: {tiff_file}")
            
            with rasterio.open(tiff_path) as src:
                # Ensure the bounds are within the TIFF's bounds
                tiff_bounds = src.bounds
                if (minx > tiff_bounds.right or
                    maxx < tiff_bounds.left or
                    miny > tiff_bounds.top or
                    maxy < tiff_bounds.bottom):
                    print(f"Polygon {polygon_id} is out of bounds of TIFF file {tiff_file}.")
                    continue
                
                # Clip the raster using the transformed polygon
                out_image, out_transform = mask(src, [mapping(transformed_polygon_geom)], crop=True)
                
                # Handle cases where no pixels are left after clipping
                if out_image.shape[1] == 0 or out_image.shape[2] == 0:
                    print(f"Polygon {polygon_id} does not overlap with TIFF file {tiff_file}.")
                    continue
                
                # Prepare an RGB image from the raster
                if out_image.shape[0] == 3:
                    # Stack bands into an RGB image
                    rgb_image = np.moveaxis(out_image, 0, -1)
                else:
                    # Convert grayscale to RGB by repeating the single band
                    rgb_image = np.stack([out_image[0]] * 3, axis=-1)

                # Normalize image values to [0, 255]
                rgb_image = np.clip(rgb_image, 0, 255).astype(np.uint8)

                # Determine the size of the clipped image
                clipped_image = Image.fromarray(rgb_image)
                clipped_width, clipped_height = clipped_image.size

                # Define the size of the padded image
                padded_width = max(clipped_width, 100)  # Example: ensuring a minimum width
                padded_height = max(clipped_height, 100)  # Example: ensuring a minimum height

                # Create a black background image
                padded_image = Image.new('RGB', (padded_width, padded_height), (0, 0, 0))
                
                # Calculate position to paste the clipped image on the black background
                paste_x = (padded_width - clipped_width) // 2
                paste_y = (padded_height - clipped_height) // 2

                # Paste the clipped image onto the black background
                padded_image.paste(clipped_image, (paste_x, paste_y))

                # Define output JPEG filename
                jpeg_filename = f"{polygon_id}_{os.path.basename(tiff_file)}.jpg"
                tif_filename = f"{polygon_id}_{os.path.basename(tiff_file)}"
                jpeg_path = os.path.join(output_folder, jpeg_filename)
                tif_path = os.path.join(output_folder, tif_filename)
                padded_image.save(jpeg_path, 'JPEG')
                
                # Save the clipped image as GeoTIFF
                with rasterio.open(tif_path, 'w', driver='GTiff', height=out_image.shape[1], width=out_image.shape[2], count=out_image.shape[0], dtype=out_image.dtype, crs=src.crs, transform=out_transform) as dst:
                    dst.write(out_image)

                # Collect data for CSV
                csv_data.append({
                    'polygon_id': polygon_id,
                    'jpeg_filename': jpeg_filename,
                    'tiff_file': tiff_file,
                    'geometry': transformed_polygon_geom.wkt,
                    'crs': tiff_crs.to_string()
                })

                print(f"Saved clipped raster for polygon {polygon_id} to {jpeg_path}")

    # Save CSV data to file
    csv_df = pd.DataFrame(csv_data)
    csv_df.to_csv(os.path.join(output_folder, 'clipped_jpegs_info.csv'), index=False)

    print("Clipping and JPEG creation completed. CSV file saved.")

In [4]:
def get_date_subfolder():
    current_date = datetime.now()
    return current_date.strftime('%Y_%m_%d')

# shapefile_path = "C:/Users/allen/OneDrive/Desktop/Work/Scripts/Tree Segmentation/data/TEAK_baseplots_utm_shapefiles/TEAK_baseplots_utm.shp"
tiff_folder = 'C:/Users/allen/OneDrive/Desktop/Work/data/orthophotos/TEAK/2023/NEON_images-camera-ortho-mosaic/NEON.D17.TEAK.DP3.30010.001.2023-07.basic.20240905T014815Z.PROVISIONAL'
output_path = 'C:/Users/allen/OneDrive/Desktop/Work/Scripts/Tree Segmentation/output/'
date_subfolder = get_date_subfolder()
polygon_csv = os.path.join(output_path, date_subfolder, "shapefile_geometries.csv")

polygon_tiff_matches = check_polygon_within_tiff_bounds(polygon_csv, tiff_folder)
# Sort the dictionary by keys (plotID)
sorted_dict = dict(sorted(polygon_tiff_matches.items()))

# Print the result
for plot_id, tiffs in sorted_dict.items():
    print(f"Polygon {plot_id} is within the bounds of TIFF files: {', '.join(tiffs)}")

Polygon TEAK_001 is within the bounds of TIFF files: 2023_TEAK_6_320000_4094000_image.tif
Polygon TEAK_002 is within the bounds of TIFF files: 2023_TEAK_6_318000_4094000_image.tif
Polygon TEAK_003 is within the bounds of TIFF files: 2023_TEAK_6_321000_4098000_image.tif
Polygon TEAK_005 is within the bounds of TIFF files: 2023_TEAK_6_323000_4103000_image.tif
Polygon TEAK_006 is within the bounds of TIFF files: 2023_TEAK_6_316000_4094000_image.tif
Polygon TEAK_007 is within the bounds of TIFF files: 2023_TEAK_6_317000_4096000_image.tif
Polygon TEAK_010 is within the bounds of TIFF files: 2023_TEAK_6_317000_4098000_image.tif
Polygon TEAK_011 is within the bounds of TIFF files: 2023_TEAK_6_321000_4097000_image.tif
Polygon TEAK_012 is within the bounds of TIFF files: 2023_TEAK_6_321000_4098000_image.tif
Polygon TEAK_013 is within the bounds of TIFF files: 2023_TEAK_6_318000_4093000_image.tif, 2023_TEAK_6_318000_4094000_image.tif
Polygon TEAK_014 is within the bounds of TIFF files: 2023_TEAK

In [5]:
display_first_tiff_info(tiff_folder)

GeoTIFF file: 2023_TEAK_6_312000_4090000_image.tif
Coordinate Reference System (CRS): EPSG:32611
Bounds: BoundingBox(left=312000.0, bottom=4090000.0, right=313000.0, top=4091000.0)
The CRS is projected (likely UTM or another projected system).
Metadata: {'driver': 'GTiff', 'dtype': 'uint8', 'nodata': None, 'width': 10000, 'height': 10000, 'count': 3, 'crs': CRS.from_epsg(32611), 'transform': Affine(0.1, 0.0, 312000.0,
       0.0, -0.1, 4091000.0)}


In [None]:
clip_polygons_to_tiffs(polygon_csv, polygon_tiff_matches, tiff_folder, os.path.join(tiff_folder, "clipped_tif"))

Processing polygon TEAK_001...
Clipping with TIFF file: 2023_TEAK_6_320000_4094000_image.tif
Saved clipped raster for polygon TEAK_001 to C:/Users/allen/OneDrive/Desktop/Work/data/orthophotos/TEAK/2023/NEON_images-camera-ortho-mosaic/NEON.D17.TEAK.DP3.30010.001.2023-07.basic.20240905T014815Z.PROVISIONAL\clipped_tif\TEAK_001_2023_TEAK_6_320000_4094000_image.tif.jpg
Processing polygon TEAK_016...
Clipping with TIFF file: 2023_TEAK_6_321000_4100000_image.tif
Saved clipped raster for polygon TEAK_016 to C:/Users/allen/OneDrive/Desktop/Work/data/orthophotos/TEAK/2023/NEON_images-camera-ortho-mosaic/NEON.D17.TEAK.DP3.30010.001.2023-07.basic.20240905T014815Z.PROVISIONAL\clipped_tif\TEAK_016_2023_TEAK_6_321000_4100000_image.tif.jpg
Processing polygon TEAK_003...
Clipping with TIFF file: 2023_TEAK_6_321000_4098000_image.tif
Saved clipped raster for polygon TEAK_003 to C:/Users/allen/OneDrive/Desktop/Work/data/orthophotos/TEAK/2023/NEON_images-camera-ortho-mosaic/NEON.D17.TEAK.DP3.30010.001.2023

In [None]:

def clip_and_merge_tifs_by_polygon(shapefile_path, tiff_folder_path, output_folder_path):
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)
    
    # Load the shapefile
    gdf = gpd.read_file(shapefile_path)
    
    # Dictionary to track which TIFF files need to be merged for each polygon
    polygon_clipped_files = {}

    # Iterate over each polygon in the shapefile
    for _, row in gdf.iterrows():
        polygon = row['geometry']
        plot_id = row['plotID']  # Assuming each polygon has a unique plotID
        
        # Create a list to store paths of clipped TIFF files for this polygon
        polygon_clipped_files[plot_id] = []

        # Iterate over each TIFF file
        for tiff_file in os.listdir(tiff_folder_path):
            if tiff_file.endswith('.tif'):
                tiff_path = os.path.join(tiff_folder_path, tiff_file)
                
                try:
                    with rasterio.open(tiff_path) as src:
                        tiff_extent = box(*src.bounds)
                        
                        # Check if the polygon intersects with the TIFF extent
                        if not polygon.intersects(tiff_extent):
                            print(f"{tiff_file} does not intersect with the polygon {plot_id}.")
                            continue  # Skip TIFF files that do not intersect with the polygon
                        
                        # Clip the TIFF file with the polygon
                        clipped_files = []
                        for band in range(1, src.count + 1):
                            out_image, out_transform = mask(src, [polygon], crop=True, indexes=band)
                            
                            # Check if the clipped image contains data
                            if out_image.sum() == 0:
                                print(f"{tiff_file} has no data for polygon {plot_id}.")
                                continue
                                
                            clipped_files.append(out_image)

                        if clipped_files:
                            out_meta = src.meta.copy()
                            out_meta.update({
                                "driver": "GTiff",
                                "count": len(clipped_files),
                                "height": clipped_files[0].shape[1],
                                "width": clipped_files[0].shape[2],
                                "transform": out_transform
                            })

                            clipped_tiff_path = os.path.join(output_folder_path, f"{plot_id}_{tiff_file}")
                            with rasterio.open(clipped_tiff_path, "w", **out_meta) as dest:
                                for i, image in enumerate(clipped_files, start=1):
                                    dest.write(image, i)
                                
                            # Add the clipped file path to the list for this polygon
                            polygon_clipped_files[plot_id].append(clipped_tiff_path)
                except Exception as e:
                    # Print detailed error information
                    print(f"Error processing {tiff_file} with polygon {plot_id}: {e}")

    # Merge TIFF files for each polygon
    for plot_id, tiff_files in polygon_clipped_files.items():
        if not tiff_files:
            continue  # Skip polygons with no clipped TIFF files

        # Read and merge all clipped TIFF files for this polygon
        merge_files = []
        for clipped_file in tiff_files:
            try:
                with rasterio.open(clipped_file) as src:
                    for band in range(1, src.count + 1):
                        merge_files.append(src.read(band))
            except Exception as e:
                # Print detailed error information
                print(f"Error reading {clipped_file} for merging: {e}")
        
        if merge_files:
            merged_image, merged_transform = merge(merge_files)
            
            # Save merged TIFF
            merged_meta = src.meta.copy()
            merged_meta.update({
                "driver": "GTiff",
                "count": merged_image.shape[0],
                "height": merged_image.shape[1],
                "width": merged_image.shape[2],
                "transform": merged_transform
            })
            
            merged_tiff_path = os.path.join(output_folder_path, f"{plot_id}_merged.tif")
            with rasterio.open(merged_tiff_path, "w", **merged_meta) as dest:
                for i, image in enumerate(merged_image, start=1):
                    dest.write(image, i)
    
    print(f"Clipping and merging complete. Output saved to {output_folder_path}")
