# Transit length (xtran) calculator

The following code combines hydraulic geometry, WBMsed sediment flux data, floodplain reworking timescales (TR), and river length geometries (produced in RivMapper and drawn from HydroAtlas) to calculate the characteristic transit lengths (xtran) for each reach within a given river system. The equation is a sediment budget calculation that balances the incoming bed material flux with the volumetric rate of sediment accretion onto the floodplain.

The transit length, or length scale, (xtran, sometimes denoted as Ls) equation is a well-documented method for estimating sediment transit distances, and is used in:
Torres, M.A., Limaye, A.B., Ganti, V., Lamb, M.P., West, A.J., Fischer, W.W., 2017. Model predictions of long-lived storage of organic carbon in river deposits. Earth Surface Dynamics 5, 711–730. https://doi.org/10.5194/esurf-5-711-2017

Pizzuto, J., Schenk, E.R., Hupp, C.R., Gellis, A., Noe, G., Williamson, E., Karwan, D.L., O’Neal, M., Marquard, J., Aalto, R., Newbold, D., 2014. Characteristic length scales and time-averaged transport velocities of suspended sediment in the mid-Atlantic Region, USA. Water Resources Research 50, 790–805. https://doi.org/10.1002/2013WR014485

WBMsed dataset: 
Cohen, S., Syvitski, J., Ashley, T., Lammers, R., Fekete, B., Li, H.-Y., 2022. Spatial Trends and Drivers of Bedload and Suspended Sediment Fluxes in Global Rivers. Water Resources Research 58, e2021WR031583. https://doi.org/10.1029/2021WR031583

HydroATLAS: 
Linke, S., Lehner, B., Ouellet Dallaire, C., Ariwi, J., Grill, G., Anand, M., Beames, P., Burchard-Levine, V., Maxwell, S., Moidu, H., Tan, F., Thieme, M., 2019. Global hydro-environmental sub-basin and river reach characteristics at high spatial resolution. Sci Data 6, 283. https://doi.org/10.1038/s41597-019-0300-6

Author: James (Huck) Rees; PhD Student, UCSB Geography

Date: April 9, 2025

## Import packages

In [7]:
import os
import geopandas as gpd
import pandas as pd
from pyproj import CRS
import glob
import re

## Initialize functions

In [28]:
def get_real_reach_length(working_directory: str, river_name: str) -> pd.DataFrame:
    """
    Calculate the real reach lengths from a shapefile containing river reach geometries.

    Parameters:
        working_directory (str): Base path to the data directory.
        river_name (str): Name of the river used to locate the shapefile.

    Returns:
        pd.DataFrame: DataFrame with reach lengths in meters.
    """
    # Construct the path to the shapefile
    shapefile_path = os.path.join(
        working_directory, "HydroATLAS", "HydroRIVERS", "Extracted_Rivers", river_name, f"{river_name}_reaches.shp"
    )

    # Load shapefile using geopandas
    gdf = gpd.read_file(shapefile_path)

    # Print the Coordinate Reference System (CRS)
    print("Original CRS:", gdf.crs)

    # Convert to WGS84 (lat/lon) for UTM zone determination
    gdf_wgs84 = gdf.to_crs(epsg=4326)

    # Calculate UTM zone from centroid longitude
    centroid = gdf_wgs84.geometry.unary_union.centroid
    lon = centroid.x
    utm_zone = int((lon + 180) / 6) + 1
    epsg_code = 32600 + utm_zone if centroid.y >= 0 else 32700 + utm_zone

    print("Reprojecting to: EPSG:", epsg_code)

    # Reproject to appropriate UTM zone
    gdf = gdf.to_crs(epsg=epsg_code)

    # Compute length of each reach in meters and add as new column
    gdf["real_reach_length_m"] = gdf.geometry.length

    # Return DataFrame with geometry removed, showing lengths
    return gdf.drop(columns=["geometry", "reach_len", "ds_dist"])

def calculate_xtran(
    working_directory: str,
    river_name: str,
    real_reach_lengths: pd.DataFrame,
    bulk_density: float
) -> pd.DataFrame:
    """
    Calculate cross-channel translation distances (x_tran) for each reach.

    Parameters:
        working_directory (str): Base path to the data directory.
        river_name (str): Name of the river.
        real_reach_lengths (pd.DataFrame): DataFrame containing real reach lengths.
        bulk_density (float): Bulk density of sediment in kg/m^3.

    Returns:
        pd.DataFrame: DataFrame with x_tran and n_stor values for each reach.
    """
    # Construct the path to the hydraulic geometry CSV
    hg_path = os.path.join(
        working_directory, "RiverMapping", "HydraulicGeometry", river_name, f"{river_name}_hydraulic_geometry.csv"
    )

    # Load hydraulic geometry data
    hydraulic_geometry = pd.read_csv(hg_path)
    hydraulic_geometry = hydraulic_geometry.rename(columns={"length_m": "GQBF_reach_length_m"})

    # Construct the path to the translation rate values CSV
    tr_directory = os.path.join(
        working_directory, "RiverMapping", "Mobility", river_name, "TR_Distributions"
    )
    
    tr_csv_files = glob.glob(os.path.join(tr_directory, "Reach_*TR_i.csv"))
    pattern = r"Reach_(\d+)_TR_i\.csv"
    
    results = []

    for file_path in tr_csv_files:
        filename = os.path.basename(file_path)
        match = re.match(pattern, filename)
        if not match:
            continue
        
        ds_order = int(match.group(1))
        df = pd.read_csv(file_path)

        if 'TR_i' not in df.columns:
            raise ValueError(f"File '{filename}' does not contain 'TR_i' column")
        
        median_tr = df['TR_i'].median()
        results.append({"ds_order": ds_order, "TR_i": median_tr})

    # Load translation rate values
    tr_vals = pd.DataFrame(results)

    # Construct the path to the WBMsed data CSV
    wbmsed_path = os.path.join(
        working_directory, "WBMsed", "Extracted_Rivers", f"{river_name}_wbmsed.csv"
    )

    # Load WBMsed data
    wbmsed = pd.read_csv(wbmsed_path)

    # Merge all DataFrames on 'ds_order'
    merged_df = real_reach_lengths.merge(hydraulic_geometry, on="ds_order")
    merged_df = merged_df.merge(tr_vals, on="ds_order")
    merged_df = merged_df.merge(wbmsed, on="ds_order")

    # Convert sediment flux from kg/s to m^3/year using bulk density
    seconds_per_year = 365.25 * 24 * 60 * 60
    merged_df['sediment_flux_m3_yr'] = (
        (merged_df['mean_BedloadFlux_kg_s'] + merged_df['mean_SuspendedBedFlux_kg_s']) * seconds_per_year / bulk_density
    )

    # Compute x_tran in meters using sediment balance equation:
    # sediment_flux_m3_yr = x_tran * depth * width / TR
    # => x_tran = sediment_flux_m3_yr * TR / (depth * width)
    merged_df['x_tran_m'] = (
        merged_df['sediment_flux_m3_yr'] * merged_df['TR_i'] /
        (merged_df['depth_for_calcs_m'] * merged_df['median_width_m'])
    )

    # Compute n_stor as real_reach_length / x_tran
    merged_df['n_stor'] = merged_df['real_reach_length_m'] / merged_df['x_tran_m']

    return merged_df

def process_transit_lengths(csv_file):
    
    """
    Wrapper function to calculate x_tran values and save them to CSV.

    Parameters:
        working_directory (str): Base path to the data directory.
        river_name (str): Name of the river.
        real_reach_lengths (pd.DataFrame): DataFrame containing real reach lengths.
        bulk_density (float): Bulk density of sediment in kg/m^3.
    """
    
    # Read the CSV file containing input variables for multiple rivers
    river_data = pd.read_csv(csv_file)

    # Loop through each row (each river) in the CSV
    for index, row in river_data.iterrows():
        # Extract necessary input values from the current CSV row
        river_name = row['river_name']  # Name of the river
        working_directory = row['working_directory']  # Directory for processing
        bulk_density = row['sediment_bulkdensity_kg_m3'] # Sediment bulk density in kg per cubic meter
    
        # Calculate real reach lengths
        real_reach_lengths = get_real_reach_length(working_directory, river_name)
    
        # Run function to calculate transit lengths
        output_df = calculate_xtran(working_directory, river_name, real_reach_lengths, bulk_density)

        # Construct output path
        output_path = os.path.join(
            working_directory, "RiverMapping", "Mobility", river_name, f"{river_name}_transit_lengths.csv"
        )

        # Ensure directory exists
        os.makedirs(os.path.dirname(output_path), exist_ok=True)

        # Write to CSV
        output_df.to_csv(output_path, index=False)
        print(f"Transit data saved to: {output_path}")


In [18]:
# Construct the path to the translation rate values CSV
tr_directory = os.path.join(
    r"C:\Users\huckr\Desktop\UCSB\Dissertation\Data", "RiverMapping", "Mobility", "Beni", "TR_Distributions"
)
    
tr_csv_files = glob.glob(os.path.join(tr_directory, "Reach_*TR_i.csv"))
pattern = r"Reach_(\d+)_TR_i\.csv"
    
results = []

for file_path in tr_csv_files:
    filename = os.path.basename(file_path)
    match = re.match(pattern, filename)
    if not match:
        continue
        
    ds_order = int(match.group(1))
    df = pd.read_csv(file_path)

    if 'TR_i' not in df.columns:
        raise ValueError(f"File '{filename}' does not contain 'TR_i' column")
        
    median_tr = df['TR_i'].max()
    results.append({"ds_order": ds_order, "TR_i": median_tr})

# Load translation rate values
tr_vals = pd.DataFrame(results)

## Process transit lengths

In [29]:
csv_file_path = r"C:\Users\huckr\Desktop\UCSB\Dissertation\Data\RiverMapping\Beni_river_datasheet.csv"
process_transit_lengths(csv_file_path)

Original CRS: EPSG:3395
Reprojecting to: EPSG: 32719
Transit data saved to: C:\Users\huckr\Desktop\UCSB\Dissertation\Data\RiverMapping\Mobility\Beni\Beni_transit_lengths.csv
