# 1) Cropping GeoTIFF Images to a Shapefile Area

    This code defines a function crop_tiffs_to_shp that automates the process of cropping GeoTIFF images to the boundaries defined by a shapefile, then saves the cropped images to a specified output directory, efficiently handling large spatial datasets by incorporating garbage collection and time tracking for the process.

In [None]:
import geopandas as gpd
import os
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
import time
import gc

def crop_tiffs_to_shp(input_folder, output_folder, shapefile_path):
    shapefile = gpd.read_file(shapefile_path)
    gc.collect()
    
    for filename in os.listdir(input_folder):
        if filename.endswith('.tif'):
            tiff_path = os.path.join(input_folder, filename)
            
            with rasterio.open(tiff_path) as src:
                shapefile_crs = shapefile.to_crs(src.crs)
                
                out_image, out_transform = mask(src, [mapping(shapefile_crs.geometry[0])], crop=True)
                
                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": "GTiff",
                    "height": out_image.shape[1],
                    "width": out_image.shape[2],
                    "transform": out_transform
                })
                
                output_tiff_path = os.path.join(output_folder, f"{filename}")
                with rasterio.open(output_tiff_path, "w", **out_meta) as dest:
                    dest.write(out_image)
                    
input_folder_path = '/path/to/mosaic/'
output_folder_path = '/path/to/save/mosaic_clip/'
shapefile_path = '/path/to/area_boundaries/boundaries.shp'

start = time.process_time()
crop_tiffs_to_shp(input_folder_path, output_folder_path, shapefile_path)

print("Clipping completed. Images saved in:", output_folder_path)
print("Processing time [min]", (time.process_time() - start) / 60)