# Hydraulic Geometry Calculator

The following code takes the standard RivMapper reach polygons, and clips and trims the Global Bankfull Discharge Dataset (GQBF) to each reach. Using this dataset. Using GQBF, the ArcticDEM, NASADEM, the BASED stream depth estimator API, and standard Python geospatial libraries, the code extracts the median wetted channel width, median bankfull discharge, channel length, channel slope, and estimated bankfull channel depth for each reach, outputting all metrics to a .csv and mapping elevation sampling points and exporting the channel map and slope regression to PNGs.

Global River BankFull Discharge (GQBF): 
Liu, Y., Wortmann, M., & Slater, L. (2024). Global River BankFull Discharge (GQBF) (0.1) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.13855371

NASADEM: https://developers.google.com/earth-engine/datasets/catalog/NASA_NASADEM_HGT_001

ArcticDEM:https://www.pgc.umn.edu/data/arcticdem/

Boost-Assisted Stream Estimator for Depth (BASED):https://github.com/jameshgrn/based_api

Author: James (Huck) Rees; PhD Student, UCSB Geography

Date: January 21, 2026

## Import packages

In [20]:
import requests
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pyproj
from pyproj import CRS, Transformer
import os
from shapely.ops import unary_union, split, linemerge
from shapely.geometry import LineString, Point
import xgboost as xgb
import time
import ee

ee.Initialize()

## Initialize functions

In [21]:
def get_GQBF(river_name, reach_gdf, continent_abr, working_directory):
    shapefile_path = os.path.join(working_directory, 'GQBF', 'Extracted_rivers', river_name, f"{river_name}.shp")
    
    if os.path.isfile(shapefile_path):
        gdf = gpd.read_file(shapefile_path)
    else:
        gdf = extract_GQBF(river_name, reach_gdf, continent_abr, working_directory)

    if gdf is not None and not gdf.empty:
        trimmed_gdf = trim_GQBF(reach_gdf, gdf)
        return trimmed_gdf
    else:
        return None

def extract_GQBF(river_name, reach_gdf, continent_abr, working_directory):
    gpkg_filename = f"GQBFv0.1_reaches_{continent_abr}_EPSG4326.gpkg"
    gpkg_path = os.path.join(working_directory, 'GQBF', gpkg_filename)

    if not os.path.isfile(gpkg_path):
        raise FileNotFoundError(f"GeoPackage file not found: {gpkg_path}")

    gdf = gpd.read_file(gpkg_path)
    
    if gdf.crs.to_epsg() != 4326:
        gdf = gdf.to_crs(epsg=4326)

    filtered_gdf = gdf[gdf.intersects(reach_gdf.unary_union)]
    
    output_path = os.path.join(working_directory, 'GQBF', 'Extracted_rivers', river_name)
    os.makedirs(output_path, exist_ok=True)
    shapefile_path = os.path.join(output_path, f"{river_name}.shp")

    if not filtered_gdf.empty:
        filtered_gdf.to_file(shapefile_path)

    return filtered_gdf

def get_reach(river_name, working_directory):
    reach_shapefile_path = os.path.join(working_directory, 'RiverMapping', 'Reaches', river_name, f"{river_name}.shp")

    if not os.path.isfile(reach_shapefile_path):
        raise FileNotFoundError(f"Reach shapefile not found: {reach_shapefile_path}")

    reach_gdf = gpd.read_file(reach_shapefile_path)

    if reach_gdf.crs is None:
        raise ValueError(f"Reach shapefile does not have a CRS: {reach_shapefile_path}")
    
    if reach_gdf.crs.to_epsg() != 4326:
        reach_gdf = reach_gdf.to_crs(epsg=4326)

    return reach_gdf

def trim_GQBF(reach_gdf, filtered_gdf):
    ds_order_1_reaches = reach_gdf[reach_gdf['ds_order'] == 1]
    trimmed_gqbf_gdf = filtered_gdf[filtered_gdf.intersects(ds_order_1_reaches.unary_union)].copy()
    
    def parse_upstream_l(value):
        if isinstance(value, str):
            return [int(v) for v in value.split(',')]
        elif isinstance(value, int):
            return [value]
        else:
            return []
    
    trimmed_gqbf_gdf.loc[:, 'parsed_upstream_l'] = trimmed_gqbf_gdf['upstream_l'].apply(parse_upstream_l)
    
    upstream_end_gqbf_gdf = trimmed_gqbf_gdf[trimmed_gqbf_gdf.apply(lambda row: all(up not in trimmed_gqbf_gdf['reach_id'].values for up in row['parsed_upstream_l']), axis=1)].copy()
    
    if not upstream_end_gqbf_gdf.empty:
        current_segment = upstream_end_gqbf_gdf.loc[upstream_end_gqbf_gdf['qbf'].idxmax()].copy()
    else:
        return None
    
    mainstem_segments = []
    while current_segment is not None:
        mainstem_segments.append(current_segment)
        
        downstre_values = current_segment['downstre_1']
        if isinstance(downstre_values, str):
            downstream_ids = [int(v) for v in downstre_values.split(',')]
        elif isinstance(downstre_values, int):
            downstream_ids = [downstre_values]
        else:
            downstream_ids = []
        
        downstream_segments = filtered_gdf[filtered_gdf['reach_id'].isin(downstream_ids)]
        if not downstream_segments.empty:
            current_segment = downstream_segments.loc[downstream_segments['qbf'].idxmax()].copy()
        else:
            current_segment = None
    
    ordered_reach_ids = [seg['reach_id'] for seg in mainstem_segments]
    segment_dict = {seg['reach_id']: seg for seg in mainstem_segments}
    mainstem_gqbf_gdf = gpd.GeoDataFrame([segment_dict[rid] for rid in ordered_reach_ids])
    mainstem_gqbf_gdf.crs = filtered_gdf.crs
    
    return mainstem_gqbf_gdf

def get_elevation(lat, lon, max_retries=3):
    
    for attempt in range(max_retries):
        try:
            point = ee.Geometry.Point([lon, lat])
            
            if lat >= 60:
                dem = ee.Image('UMN/PGC/ArcticDEM/V3/2m_mosaic')
                band = 'elevation'
                scale = 32
            else:
                dem = ee.Image('NASA/NASADEM_HGT/001')
                band = 'elevation'
                scale = 30
            
            sample = dem.select(band).sample(point, scale).first()
            
            if sample:
                elevation = sample.get(band).getInfo()
                if elevation is not None and -500 < elevation < 9000:
                    return elevation, None
                else:
                    return None, f"Invalid elevation: {elevation}"
            
            return None, "No data at location"
            
        except ee.EEException as e:
            error_msg = str(e)
            if 'User memory limit exceeded' in error_msg or 'Computation timed out' in error_msg:
                if attempt < max_retries - 1:
                    time.sleep(2 ** attempt)
                    continue
            return None, f"GEE Error: {error_msg[:100]}"
            
        except Exception as e:
            if attempt < max_retries - 1:
                time.sleep(1)
                continue
            return None, f"Error: {str(e)}"
    
    return None, "Max retries exceeded"

def get_slope(reach_gdf, gqbf_gdf, river_name, working_directory):
    def get_point_elevation(point):
        return get_elevation(point.y, point.x)
    
    slope_dict = {}
    
    for idx, reach in reach_gdf.iterrows():
        ds_order = reach['ds_order']
        
        reach_segments = gqbf_gdf[gqbf_gdf.intersects(reach.geometry)].copy()
        
        if reach_segments.empty:
            slope_dict[ds_order] = None
            continue
        
        total_length = reach_segments['length'].sum()
        
        if total_length == 0:
            slope_dict[ds_order] = None
            continue
        
        # Sample elevations
        n_samples = 25
        target_distances = np.linspace(0, total_length, n_samples)
        
        segment_cumulative_distances = [0]
        for _, seg in reach_segments.iterrows():
            segment_cumulative_distances.append(segment_cumulative_distances[-1] + seg['length'])
        
        elevations = []
        distances = []
        sample_points = []
        sample_numbers = []
        
        for i, target_dist in enumerate(target_distances):
            seg_idx = 0
            for j in range(len(segment_cumulative_distances) - 1):
                if segment_cumulative_distances[j] <= target_dist < segment_cumulative_distances[j + 1]:
                    seg_idx = j
                    break
            else:
                seg_idx = len(reach_segments) - 1
            
            seg = reach_segments.iloc[seg_idx]
            dist_from_seg_start = target_dist - segment_cumulative_distances[seg_idx]
            fraction_in_seg = dist_from_seg_start / seg['length'] if seg['length'] > 0 else 0
            fraction_in_seg = np.clip(fraction_in_seg, 0, 1)
            point = seg.geometry.interpolate(fraction_in_seg, normalized=True)
            
            try:
                elev, error = get_point_elevation(point)
                if elev is not None:
                    elevations.append(elev)
                    distances.append(target_dist)
                    sample_points.append(point)
                    sample_numbers.append(i + 1)
            except:
                pass
        
        if len(elevations) < 2:
            slope_dict[ds_order] = None
            continue
        
        # Outlier detection
        elevations_array = np.array(elevations)
        distances_array = np.array(distances)
        sample_numbers_array = np.array(sample_numbers)
        
        median_elev = np.median(elevations_array)
        q1 = np.percentile(elevations_array, 25)
        q3 = np.percentile(elevations_array, 75)
        iqr = q3 - q1
        lower_bound = q1 - 2.5 * iqr
        upper_bound = q3 + 2.5 * iqr
        
        is_outlier = (elevations_array < lower_bound) | (elevations_array > upper_bound)
        
        elevations_clean = elevations_array[~is_outlier]
        distances_clean = distances_array[~is_outlier]
        sample_numbers_clean = sample_numbers_array[~is_outlier]
        
        if len(elevations_clean) < 2:
            slope_dict[ds_order] = None
            continue
        
        # Sort by distance
        sort_indices = np.argsort(distances_clean)
        distances_clean = distances_clean[sort_indices]
        elevations_clean = elevations_clean[sort_indices]
        sample_numbers_clean = sample_numbers_clean[sort_indices]
        
        # Calculate slope
        slope_channel, intercept = np.polyfit(distances_clean, elevations_clean, 1)
        gradient_magnitude = abs(slope_channel)
        r_squared = np.corrcoef(distances_clean, elevations_clean)[0,1]**2
        
        slope_dict[ds_order] = gradient_magnitude
        
        # Create output directory
        output_dir = os.path.join(working_directory, "RiverMapping", "HydraulicGeometry", river_name, "Slope_regressions")
        os.makedirs(output_dir, exist_ok=True)
        
        # Plot 1: Channel map
        fig1, ax1 = plt.subplots(figsize=(10, 8))
        for i, (_, seg) in enumerate(reach_segments.iterrows()):
            ax1.plot(*seg.geometry.xy, 'b-', linewidth=2, alpha=0.5)
        
        clean_indices = np.where(~is_outlier)[0]
        for idx_val in clean_indices:
            pt = sample_points[idx_val]
            pt_num = sample_numbers_array[idx_val]
            ax1.plot(pt.x, pt.y, 'go', markersize=6, zorder=5)
            ax1.text(pt.x, pt.y, f' {pt_num}', fontsize=7, ha='left', va='bottom',
                    bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.7))
        
        ax1.set_title(f'{river_name} - Reach {ds_order}: Sample Points', fontsize=12, fontweight='bold')
        ax1.set_xlabel('Longitude')
        ax1.set_ylabel('Latitude')
        ax1.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, f'{river_name}_reach{ds_order}_map.png'), dpi=200, bbox_inches='tight')
        plt.close()
        
        # Plot 2: Elevation profile
        fig2, ax2 = plt.subplots(figsize=(10, 6))
        
        for i, (dist, elev, num) in enumerate(zip(distances_array, elevations_array, sample_numbers_array)):
            if is_outlier[i]:
                ax2.plot(dist, elev, 'rx', markersize=10, markeredgewidth=2, zorder=6)
            else:
                ax2.plot(dist, elev, 'bo', markersize=8, zorder=5)
                ax2.text(dist, elev, f' {num}', fontsize=8, ha='left', va='bottom')
        
        fit_line = slope_channel * distances_clean + intercept
        ax2.plot(distances_clean, fit_line, 'r-', linewidth=2.5, 
                label=f'Slope={slope_channel:.6f} (R²={r_squared:.3f})', zorder=4)
        
        ax2.set_title(f'{river_name} - Reach {ds_order}: Elevation Profile', fontsize=12, fontweight='bold')
        ax2.set_xlabel('Distance along channel (m)')
        ax2.set_ylabel('Elevation (m)')
        ax2.legend(loc='best')
        ax2.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, f'{river_name}_reach{ds_order}_profile.png'), dpi=200, bbox_inches='tight')
        plt.close()
    
    return slope_dict

def load_based_model(working_directory):
    model_path = os.path.join(working_directory, 'Gearon_etal_2024', 'based-api', 'based_us_sans_trampush_early_stopping_combat_overfitting.ubj')
    
    if not os.path.isfile(model_path):
        raise FileNotFoundError(f"BASED model file not found: {model_path}")
    
    model = xgb.Booster()
    model.load_model(model_path)
    return model

def predict_depth_based(model, width, slope, discharge):
    if width is None or slope is None or discharge is None:
        return None
    
    if width <= 0 or discharge <= 0:
        return None
    
    slope_abs = abs(slope)
    if slope_abs == 0:
        return None
    
    input_data = pd.DataFrame({
        'width': [width],
        'slope': [slope_abs],
        'discharge': [discharge]
    })
    
    dmatrix = xgb.DMatrix(input_data)
    
    try:
        prediction = model.predict(dmatrix)
        depth = float(prediction[0])
        if depth <= 0:
            return None
        return depth
    except:
        return None

def calculate_hydraulic_geom(river_name, continent_abr, working_directory):
    print(f"\nProcessing {river_name}...")
    
    reach_gdf = get_reach(river_name, working_directory)
    gqbf_gdf = get_GQBF(river_name, reach_gdf, continent_abr, working_directory)
    slope_dict = get_slope(reach_gdf, gqbf_gdf, river_name, working_directory)
    
    try:
        based_model = load_based_model(working_directory)
    except:
        based_model = None
    
    results = []
    
    for _, reach in reach_gdf.iterrows():
        ds_order = reach["ds_order"]
        reach_segments = gqbf_gdf[gqbf_gdf.intersects(reach.geometry)]

        if not reach_segments.empty:
            median_width = reach_segments["grwl_width"].median()
            median_qbf = reach_segments["qbf"].median()
            length = reach_segments["length"].sum()
        else:
            median_width = median_qbf = length = None

        slope = slope_dict.get(ds_order, None)
        
        depth = None
        if based_model is not None:
            depth = predict_depth_based(based_model, median_width, slope, median_qbf)

        results.append({
            "ds_order": ds_order,
            "median_width_m": median_width,
            "median_qbf_m3s": median_qbf,
            "length_m": length,
            "slope": slope,
            "BASED_depth_m": depth
        })
    
    df = pd.DataFrame(results)
    
    output_dir = os.path.join(working_directory, "RiverMapping", "HydraulicGeometry", river_name)
    os.makedirs(output_dir, exist_ok=True)
    
    output_csv_path = os.path.join(output_dir, f"{river_name}_hydraulic_geometry.csv")
    df.to_csv(output_csv_path, index=False)
    
    print(f"  ✓ Completed: {output_csv_path}")

def process_hydraulic_geom_calculator(csv_file_path):
    river_data = pd.read_csv(csv_file_path)

    for index, row in river_data.iterrows():
        river_name = row['river_name']
        working_directory = row['working_directory']
        continent_abbr = row['hydroatlas_zone']
        
        calculate_hydraulic_geom(river_name, continent_abbr, working_directory)
    
    print("\n✓ All rivers processed successfully!")

## Input RivMapper .csv path and run hydraulic geometry calculator

In [23]:
csv_file_path = r"E:\Dissertation\Data\Bermejo_river_datasheet.csv"
process_hydraulic_geom_calculator(csv_file_path)


Processing Bermejo...
  ✓ Completed: E:\Dissertation\Data\RiverMapping\HydraulicGeometry\Bermejo\Bermejo_hydraulic_geometry.csv

✓ All rivers processed successfully!
