In [15]:
# Inputs:
hdf_file_path = r'E:\HECRAS_2D_12070205\BLE_12070205_Engineering_Models\Engineering Models\Hydraulic Models\RAS_Submittal\LBSG_501\Input\BLE_LBSG_501.p02.hdf'

# Specify the path where you want to save the GeoPackage file
output_path = r'E:\sample_2d_output\terrain_dem_limits.gpkg'

In [2]:
# Last revised - 2024.04.23

import h5py
import os
from shapely.geometry import Polygon, MultiPolygon, shape
import geopandas as gpd
import pandas as pd
import rasterio
from rasterio.features import shapes
import numpy as np
from osgeo import gdal
import math

In [3]:
# ------------------------
def fn_get_group_names(hdf5_file_path, group_path):
    """
    Retrieve the names of groups within a specified HDF5 file under a given group path.

    Parameters:
    hdf5_file_path (str): The file path to the HDF5 file.
    group_path (str): The path to the group whose subgroups' names are to be retrieved.

    Returns:
    list or None: A list containing the names of groups found under the specified group path. 
                  Returns None if the group path does not exist in the HDF5 file.
    """
    try:
        with h5py.File(hdf5_file_path, 'r') as hdf_file:
            # Check if the specified group path exists
            if group_path in hdf_file:
                group = hdf_file[group_path]

                # Extract names of HDF5 Group objects
                group_names = [name for name in group if isinstance(group[name], h5py.Group)]

                return group_names
            else:
                print(f"Group '{group_path}' not found in the HDF5 file.")
                return None
    except Exception as e:
        print(f"An error occurred: {e}")
# ------------------------

In [13]:
# ------------------------
def get_gdf_of_2d_area(hdf_file_path):

    # Specify the HDF5 file path and group path
    str_hdf_geom_path = '/Geometry/2D Flow Areas/'

    # Get names of HDF5 Group objects in the specified group
    list_group_names = fn_get_group_names(hdf_file_path, str_hdf_geom_path)

    b_has_2d_area = False

    if len(list_group_names) > 1:
        print('Multiple 2D areas found -- Using first area:', list_group_names[0])
        b_has_2d_area = True
    elif len(list_group_names) == 0:
        print('Error: No 2D areas found')
    else:
        # Only one 2D area found
        b_has_2d_area = True

    if b_has_2d_area:
        str_perimeter_points = str_hdf_geom_path + list_group_names[0] + '/' + 'Perimeter'

        # Open the HDF file
        with h5py.File(hdf_file_path, 'r') as hdf_file:
            arr_perim_points = hdf_file[str_perimeter_points][:]

            # Extract the projection
            projection_wkt = hdf_file.attrs['Projection'].decode('utf-8')

            str_terrain_filename = hdf_file['/Geometry/'].attrs['Terrain Filename'].decode('utf-8')

        # Convert the array of perimeter points into a Polygon
        shp_polygon_geom = Polygon(arr_perim_points)

        # Create a GeoDataFrame
        gdf_2d_area_polygon = gpd.GeoDataFrame(index=[0], crs=projection_wkt, geometry=[shp_polygon_geom])
        
        gdf_2d_area_polygon['area_2d_name'] = list_group_names[0]
        gdf_2d_area_polygon['hdf_path'] = hdf_file_path
        gdf_2d_area_polygon['terrain_path'] = str_terrain_filename
        gdf_2d_area_polygon['prj_wkt_ras_model'] = projection_wkt
        
        return(gdf_2d_area_polygon)
    else:
        pass
        # return nothing as there is nothing to return
# ------------------------

In [5]:
# -------------
def fn_make_raster_limits_gdf(str_input_raster_filepath, flt_minvalue):
    
    # open the raster file and convert to 'binary' of flooded=1 and null = 255
    with rasterio.open(str_input_raster_filepath) as src:
        # Read the raster data as a numpy array
        data = src.read(1)

        # Set values greater than flt_minvalue and <= flt_max_value to 1, others to 255
        data = np.where(data > flt_minvalue, 1, 255).astype('uint8')

        # Create a new raster file with the updated data type and nodata value
        profile = src.profile
        profile.update(dtype=rasterio.uint8, nodata=255)

        # Use in-memory storage
        output_raster_memory = rasterio.MemoryFile()
        with output_raster_memory.open(**profile) as dst:
            dst.write(data, 1)

    with output_raster_memory.open() as src:
        # Read the raster data
        data = src.read(1, masked=True)  # using masked=True to handle NoData as a mask

        # Set all non-null values to 1
        data = np.where(data.mask, 255, 1).astype('uint8')

        # Extract shapes from the raster data
        shapes_gen = shapes(data, mask=data != 255, transform=src.transform)

        # Convert shapes to Shapely geometries and create a GeoDataFrame
        geometries = [shape(geom) for geom, _ in shapes_gen]
        gdf = gpd.GeoDataFrame(geometry=geometries, crs=src.crs)

    # Merge all polygons into a single MultiPolygon
    merged_polygon = gdf['geometry'].unary_union

    # Create a new GeoDataFrame with the merged MultiPolygon
    gdf_raster_limits = gpd.GeoDataFrame(geometry=[merged_polygon], crs=gdf.crs)

    
    # Close downscaled_raster_memory to release the memory
    output_raster_memory.close()
    
    return(gdf_raster_limits)
# -------------

In [6]:
# ---------------
def fn_raster_metadata_as_dict(str_raster_path):
    
    # Uses gdal to get metadata parameters of first band in raster
    
    # Open the raster dataset
    dataset = gdal.Open(str_raster_path)

    # Get the resolution (pixel size) of the raster
    tup_resolution = dataset.GetGeoTransform()[1], dataset.GetGeoTransform()[5]
    flt_res = abs(tup_resolution[0])

    # Get the horizontal units from the CRS
    str_horizontal_units = dataset.GetProjectionRef().split('UNIT["')[-1].split('"')[0]
    
    # Get the CRS string
    str_crs = dataset.GetProjectionRef()

    # Get the metadata for the first band
    band = dataset.GetRasterBand(1)
    metadata = band.GetMetadata()

    # Extract TYPE and UNITS from the metadata
    str_band_type = metadata.get('TYPE')
    str_band_units = metadata.get('UNITS')

    # Create a dictionary of metadata
    dict_metadata_band1 = {
        "resolution": flt_res,
        "horiz_units": str_horizontal_units,
        "vert_type": str_band_type,
        "vert_units": str_band_units,
        "crs": str_crs
    }

    # Close the dataset
    dataset = None

    return(dict_metadata_band1)
# ---------------

In [12]:
# Main Script

gdf_2d_area_polygon = get_gdf_of_2d_area(hdf_file_path)

str_terrain_path = gdf_2d_area_polygon.iloc[0]['terrain_path']

if os.path.isabs(str_terrain_path):
    print(f'Terrain path is absolute: {str_terrain_path}')
    str_hdf_terrain_full_path = str_terrain_path
else:
    print(f'Terrain path is relative: {str_terrain_path}')
    directory, filename = os.path.split(hdf_file_path)
    
    # Removing redundant '.' from relative path
    str_terrain_path = str_terrain_path.replace('.\\', '')
    
    # Assuming hdf_file_path contains the directory
    str_hdf_terrain_full_path = os.path.join(directory, str_terrain_path)
    
if os.path.exists(str_hdf_terrain_full_path):
    print(f"Terrain file found: {str_hdf_terrain_full_path}")
    
    # Read the terrain file
    group_path_terrain = '/Terrain/'
    list_terrains = fn_get_group_names(str_hdf_terrain_full_path, group_path_terrain)
    
    print(f'DEMs found {list_terrains}')
    
    # Determine the DEM file paths and priority
    list_dem_filepaths = []
    list_dem_priority = []

    directory, filename = os.path.split(str_hdf_terrain_full_path)

    with h5py.File(str_hdf_terrain_full_path, 'r') as hdf_file:
        for item in list_terrains:
            str_path_in_hdf = '/Terrain/' + item + '/'

            str_dem_filename = hdf_file[str_path_in_hdf].attrs['File'].decode('utf-8')
            int_priority = hdf_file[str_path_in_hdf].attrs['Priority']

            str_dem_filepath = os.path.join(directory, str_dem_filename)

            list_dem_filepaths.append(str_dem_filepath)
            list_dem_priority.append(int_priority)
            
    # ---
    # Process the DEMs into shapefiles
    
    list_gdfs = []
    list_meta_dict = []
    flt_minvalue = -300.0 # lowest point in America is -282 feet

    i = 1
    for raster_file_path in list_dem_filepaths:
        print(f'Processing raster {i} of {len(list_dem_filepaths)}...')
        gdf_raster_limits = fn_make_raster_limits_gdf(raster_file_path, flt_minvalue)
        list_gdfs.append(gdf_raster_limits)

        dict_raster_meta = fn_raster_metadata_as_dict(raster_file_path)
        list_meta_dict.append(dict_raster_meta)
        i += 1
    # ----
    
    # Concatenate list_gdfs into a single GeoDataFrame
    gdf_terrain_dem_limits = gpd.GeoDataFrame(pd.concat(list_gdfs, ignore_index=True), crs=list_gdfs[0].crs)

    # Reset the index of the new GeoDataFrame
    gdf_terrain_dem_limits.reset_index(drop=True, inplace=True)

    gdf_terrain_dem_limits['raster_filepath'] = list_dem_filepaths
    gdf_terrain_dem_limits['priority'] = list_dem_priority

    # Convert list of dictionaries to DataFrame
    df_raster_meta = pd.DataFrame(list_meta_dict)

    # Perform left join on index values
    gdf_terrain_dem_limits_with_meta = gdf_terrain_dem_limits.merge(df_raster_meta,
                                                                    left_index=True,
                                                                    right_index=True,
                                                                    how='left')
    
    # add the plan input file that was used to determine these data
    gdf_terrain_dem_limits_with_meta['plan_hdf'] = hdf_file_path
    
    print('Complete')
    
else:
    print(f"ERROR: Terrain file dows not exist: {str_hdf_terrain_full_path}")

Terrain path is relative: .\Terrain\Terrain4.hdf
Terrain file found: E:\HECRAS_2D_12070205\BLE_12070205_Engineering_Models\Engineering Models\Hydraulic Models\RAS_Submittal\LBSG_501\Input\Terrain\Terrain4.hdf
DEMs found ['Terrain4.DEM_1', 'Terrain4.DEM_2', 'Terrain4.DEM_3']
Processing raster 1 of 3...
Processing raster 2 of 3...
Processing raster 3 of 3...
Complete


In [11]:
gdf_terrain_dem_limits_with_meta.explore()

In [16]:
gdf_terrain_dem_limits_with_meta.to_file(output_path, layer='00_terrain_dems', driver="GPKG")