In [9]:
import os
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
import shutil

# Define the input folder containing your folder structure with TIFFs
input_folder = r'/efs/Lerma/Thetasat'

# Define the output folder where clipped TIFFs will be saved with the same structure
output_folder = r'/efs/Lerma/lerma_data/Thetasat'

# Define the path to the shapefile you want to use for clipping
shapefile_path = r"/efs/Lerma/Shapefile_lerma/santiago-lerma river basin_BB.shp"

# Function to create the same folder structure in the output directory
def create_output_folder_structure(input_dir, output_dir):
    for root, dirs, files in os.walk(input_dir):
        for dir_name in dirs:
            input_subfolder = os.path.join(root, dir_name)
            output_subfolder = input_subfolder.replace(input_dir, output_dir)
            os.makedirs(output_subfolder, exist_ok=True)

# Clip TIFFs to the specified shapefile and save them in the output folder
def clip_tiffs(input_dir, output_dir, shapefile_path):
    create_output_folder_structure(input_dir, output_dir)
    shapefile = gpd.read_file(shapefile_path)

    for root, dirs, files in os.walk(input_dir):
        for file_name in files:
            if file_name.lower().endswith((".tif", ".tiff")):
                input_tif_path = os.path.join(root, file_name)
                output_tif_path = input_tif_path.replace(input_dir, output_dir)

                with rasterio.open(input_tif_path) as src:
                    out_image, out_transform = mask(
                        src, [mapping(shapefile.iloc[0].geometry)], crop=True, filled=False
                    )
                    out_meta = src.meta.copy()

                out_meta.update(
                    {
                        "driver": "GTiff",
                        "height": out_image.shape[1],
                        "width": out_image.shape[2],
                        "transform": out_transform,
                        'compress': 'LZW'
                    }
                )

                with rasterio.open(output_tif_path, "w", **out_meta) as dst:
                    dst.write(out_image)

if __name__ == "__main__":
    clip_tiffs(input_folder, output_folder, shapefile_path)
