# centerflow

**centerflow** is a Python module for modeling glacier dynamics along flowlines. It provides tools with five core functionalities:

1. **id finder**
    Sometimes we need multiple IDs for a single glacier. It's useful to be able to discern the RGIv6 ID from the RGIv7 ID, for example.

2. **Mesh generation**  
   Construct a 1D finite element mesh along an RGI-defined glacier centerline.


3. **Mesh refinement***

    Sometimes we need to limit the mesh to available data. 

4. **Data interpolation**  
   Interpolate gridded geospatial datasets (e.g., surface elevation, velocity, surface mass balance) onto the centerline mesh.

5. **Data re-interpolation**  
   Extend interpolated functions onto a longer mesh, providing a buffer for terminus advance within mesh bounds. This is currently useless, but eventually maybe I'll get icepack2 working. 

6. **Bed inversion**  
   Apply a forward-model-based bed inversion scheme following the approach of [van Pelt at al. (2013)](https://tc.copernicus.org/articles/7/987/2013/), using observed surface elevations to iteratively estimate basal topography.

These tools are designed for efficient, reproducible glacier modeling along flowlines.

## Imports

In [None]:
from dataclasses import dataclass
import firedrake
import geopandas as gpd
import icepack
import numpy as np
import pandas as pd
from pyproj import Geod
from pathlib import Path
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.crs import CRS
from rasterio.io import MemoryFile
from scipy.interpolate import interp1d
from scipy.ndimage import gaussian_filter1d
from shapely.geometry import LineString
from tqdm import trange

## rgi6_from_rgi7 etc.

In [None]:
def rgi6_from_rgi7(**kwargs):
    rgiid = kwargs["rgiid"]
    rgi6_path = kwargs["rgi6_path"]
    rgi7_path = kwargs["rgi7_path"]

    # Load RGI7 outlines and get the matching geometry
    gdf7 = gpd.read_file(rgi7_path)
    outline = gdf7[gdf7['rgi_id'].str.contains(rgiid, regex=False)].geometry.values[0]

    # Load RGI6 outlines and normalize ID column
    gdf6 = gpd.read_file(rgi6_path)
    if "RGIId" in gdf6.columns:
        gdf6 = gdf6[["RGIId", "geometry"]].rename(columns={"RGIId": "rgiid_6"})
    elif "rgi_id" in gdf6.columns:
        gdf6 = gdf6[["rgi_id", "geometry"]].rename(columns={"rgi_id": "rgiid_6"})
    else:
        raise ValueError(f"No RGI ID column found in {rgi6_path}")

    # Project to metric CRS for area calculation
    target_crs = "EPSG:32646"
    gdf6 = gdf6.to_crs(target_crs)
    outline_gdf = gpd.GeoDataFrame(geometry=[outline], crs=gdf7.crs).to_crs(target_crs)

    # Intersect and pick the largest overlap
    inter = gpd.overlay(gdf6, outline_gdf, how="intersection")
    if inter.empty:
        return None
    inter["overlap_area_m2"] = inter.geometry.area
    best_match = inter.sort_values("overlap_area_m2", ascending=False).iloc[0]["rgiid_6"]

    # Keep only the "15.xxxxx" part
    return best_match.split("-", 1)[-1]


def rgi7_from_rgi6(**kwargs):
    rgiid = kwargs["rgiid"]
    rgi6_path = kwargs["rgi6_path"]
    rgi7_path = kwargs["rgi7_path"]

    # Load RGI6 outlines and get the matching geometry
    gdf6 = gpd.read_file(rgi6_path)
    if "RGIId" in gdf6.columns:
        gdf6 = gdf6[["RGIId", "geometry"]].rename(columns={"RGIId": "rgiid_6"})
    elif "rgi_id" in gdf6.columns:
        gdf6 = gdf6[["rgi_id", "geometry"]].rename(columns={"rgi_id": "rgiid_6"})
    else:
        raise ValueError(f"No RGI ID column found in {rgi6_path}")
    outline = gdf6[gdf6['rgiid_6'].str.contains(rgiid, regex=False)].geometry.values[0]

    # Load RGI7 outlines
    gdf7 = gpd.read_file(rgi7_path)[["rgi_id", "geometry"]]

    # Project to metric CRS for area calculation
    target_crs = "EPSG:32646"
    gdf7 = gdf7.to_crs(target_crs)
    outline_gdf = gpd.GeoDataFrame(geometry=[outline], crs=gdf6.crs).to_crs(target_crs)

    # Intersect and pick the largest overlap
    inter = gpd.overlay(gdf7, outline_gdf, how="intersection")
    if inter.empty:
        return None
    inter["overlap_area_m2"] = inter.geometry.area
    best_match = inter.sort_values("overlap_area_m2", ascending=False).iloc[0]["rgi_id"]

    # Keep only the "15-xxxxx" part
    return "-".join(best_match.split("-")[3:])

def latlon_from_rgi7(**kwargs):
    rgiid_7   = kwargs["rgiid"]
    rgi7_path = kwargs["rgi7_path"]

    # Load and filter
    gdf7 = gpd.read_file(rgi7_path)
    match = gdf7[gdf7['rgi_id'].str.contains(rgiid_7, regex=False)]
    if match.empty:
        raise ValueError(f"RGI7 ID '{rgiid_7}' not found in {rgi7_path}")

    # Reproject to a metric CRS for centroid calculation
    metric_crs = "EPSG:32646"  # Bhutan region
    centroid_metric = match.to_crs(metric_crs).geometry.centroid.iloc[0]

    # Convert centroid back to geographic CRS
    centroid_geo = gpd.GeoSeries([centroid_metric], crs=metric_crs).to_crs("EPSG:4326").iloc[0]
    lat, lon = centroid_geo.y, centroid_geo.x

    # Build tile string
    lat_prefix = "N" if lat >= 0 else "S"
    lon_prefix = "E" if lon >= 0 else "W"
    lat_deg = int(np.floor(np.abs(lat)))
    lon_deg = int(np.floor(np.abs(lon)))
    tile = f"{lat_prefix}{lat_deg:02d}{lon_prefix}{lon_deg:03d}"

    return tile

## centerline_mesh

This is set up for consistency with RGI 7.0. 

In [None]:
@dataclass
class IntervalMeshResult:
    mesh: firedrake.IntervalMesh
    x: np.ndarray
    y: np.ndarray
    X: np.ndarray
    glacier_length: float
    mesh_length: float
    centerline: object
    centerline_extended: object
    outline: object

def centerline_mesh(**kwargs):
    rgiid = kwargs.get('rgiid', '15-09534') #default to Luggye glacier (Bhutan) if no RGI ID is supplied
    centerline_path = kwargs['centerline_path']
    outline_path = kwargs['outline_path']
    extra_length = kwargs.get('extra_length', 0) #extra length = L means we mesh L extra meters beyond the terminus. Currently not useful
    n_cells = kwargs['n_cells']

    outlines = gpd.read_file(outline_path)
    centerlines = gpd.read_file(centerline_path)
    outline = outlines[outlines['rgi_id'].str.contains(rgiid)].geometry.values[0]
    flowlines = centerlines[centerlines.intersects(outline)] #may contain severeral smaller tributary flowlines
    centerline = flowlines.loc[flowlines.to_crs('EPSG:32646').length.idxmax(), 'geometry'] #so grab the longest one

    geod = Geod(ellps = 'WGS84')
    x, y = centerline.xy
    distances = [0] + [geod.inv(x[i], y[i], x[i+1], y[i+1])[2] for i in range(len(x) - 1)]
    glacier_length = np.sum(distances)
    length = glacier_length + extra_length #for the purposes of constructing the interval mesh

    if extra_length > 0: #we'll make sure we include the appropriate amount of extra length, if necessary
        azimuth, _, _ = geod.inv(x[-2], y[-2], x[-1], y[-1])
        x_new, y_new, _ = geod.fwd(x[-1], y[-1], azimuth, extra_length)
        x = np.append(x, x_new)
        y = np.append(y, y_new)
        distances.append(extra_length)
    centerline_extended = LineString(np.column_stack((x, y)))

        

    mesh = firedrake.IntervalMesh(n_cells, length)
    X = mesh.coordinates.dat.data_ro.flatten() #x coords in array form, useful for plotting values along the line

    return IntervalMeshResult(
        mesh = mesh,
        x = np.array(x),
        y = np.array(y),
        X = X,
        glacier_length = glacier_length,
        mesh_length = float(length),
        centerline = centerline,
        centerline_extended = centerline_extended,
        outline = outline,
    )

## crop_mesh

In [None]:
def _cut(line, d):
    if d <= 0.0:
        return [None, LineString(line.coords)]
    if d >= line.length:
        return [LineString(line.coords), None]
    coords, acc = list(line.coords), 0.0
    for i in range(len(coords) - 1):
        p0, p1 = coords[i], coords[i + 1]
        seg = LineString([p0, p1])
        L = seg.length
        if acc + L >= d:
            t = (d - acc) / L
            x = p0[0] + t * (p1[0] - p0[0])
            y = p0[1] + t * (p1[1] - p0[1])
            pt = (x, y)
            return [LineString(coords[:i + 1] + [pt]), LineString([pt] + coords[i + 1:])]
        acc += L
    return [LineString(coords), None]

def _segment(line, d0, d1):
    left, mid = _cut(line, d0)
    mid, right = _cut(mid, d1 - d0)
    return mid

def crop_mesh(**kwargs):
    mesh = kwargs['mesh']
    data_path = kwargs['data_path']

    with rasterio.open(data_path) as src:
        r_crs = src.crs
        nodata = src.nodata

    cl_m = gpd.GeoSeries([mesh.centerline_extended], crs = 'EPSG:4326').to_crs(r_crs).iloc[0]

    target_samples = 400
    step = max(cl_m.length / target_samples, 25.0)
    dists = np.arange(0.0, cl_m.length + step, step, dtype = float)
    pts = [cl_m.interpolate(float(d)) for d in dists]

    with rasterio.open(data_path) as src:
        vals = np.array([v[0] for v in src.sample([(p.x, p.y) for p in pts])])

    valid = ~np.isnan(vals) if nodata is None else (vals != nodata)
    if not valid.any():
        raise ValueError('No valid data found along centerline for the provided raster.')

    first = int(np.argmax(valid))
    last = int(len(valid) - 1 - np.argmax(valid[::-1]))
    d0, d1 = float(dists[first]), float(dists[last])

    cl_m_cropped = _segment(cl_m, d0, d1)
    cl_ll = gpd.GeoSeries([cl_m_cropped], crs = r_crs).to_crs('EPSG:4326').iloc[0]

    geod = Geod(ellps = 'WGS84')
    xs, ys = np.asarray(cl_ll.xy[0]), np.asarray(cl_ll.xy[1])
    segs = [geod.inv(xs[i], ys[i], xs[i + 1], ys[i + 1])[2] for i in range(len(xs) - 1)]
    new_len = float(np.sum(segs))

    n_cells = mesh.mesh.num_cells()
    new_mesh = firedrake.IntervalMesh(n_cells, new_len)
    X = new_mesh.coordinates.dat.data_ro.flatten()

    return type(mesh)(
        mesh = new_mesh,
        x = xs,
        y = ys,
        X = X,
        glacier_length = new_len,
        mesh_length = new_len,
        centerline = cl_ll,
        centerline_extended = cl_ll,
        outline = mesh.outline,
    )

## map_to_mesh

In [None]:
@dataclass
class InterpolateResult:
    data: firedrake.Function
    last_nonzero_value: float

def map_to_mesh(**kwargs):
    mesh = kwargs['mesh']
    data_path = kwargs['data_path']
    extension = Path(data_path).suffix
    dimension = kwargs.get('dimension', 1)
    element = kwargs.get('element', 'CG')
    ice_free_value = kwargs.get('ice_free_value', None)
    key_value = kwargs.get('key_value', 'n/a')
    data_value = kwargs.get('data_value', 'n/a')
    key_dataset = kwargs.get('key_dataset', 'n/a')
    projection = kwargs.get('projection', 'EPSG:4326')

    if extension == '.tif':
        x, y = mesh.x, mesh.y

        with rasterio.open(data_path) as src:
            src_crs = src.crs
            target_crs = CRS.from_string(projection)

            if src_crs != target_crs:
                print(f'Reprojecting {data_path} from {src_crs} to {target_crs}')
                transform, width, height = calculate_default_transform(
                    src_crs, target_crs, src.width, src.height, *src.bounds)

                meta = src.meta.copy()
                meta.update({
                    'crs': target_crs,
                    'transform': transform,
                    'width': width,
                    'height': height
                })

                with MemoryFile() as memfile:
                    with memfile.open(**meta) as dst:
                        for i in range(1, src.count + 1):
                            reproject(
                                source = rasterio.band(src, i),
                                destination = rasterio.band(dst, i),
                                src_transform = src.transform,
                                src_crs = src_crs,
                                dst_transform = transform,
                                dst_crs = target_crs,
                                resampling = Resampling.bilinear
                            )
                    with memfile.open() as reproj:
                        values = np.array(list(reproj.sample(zip(x, y)))).flatten()
            else:
                values = np.array(list(src.sample(zip(x, y)))).flatten()

        distances = np.insert(np.cumsum([Geod(ellps = 'WGS84').inv(x[i], y[i], x[i+1], y[i+1])[2] for i in range(len(x) - 1)]), 0, 0)
        vertex_coords = mesh.mesh.coordinates.dat.data_ro.flatten()
        interp_vals = interp1d(distances, values, bounds_error = False, fill_value = 'extrapolate')(vertex_coords)
        
        if ice_free_value is not None:
            interp_vals[vertex_coords > mesh.glacier_length] = ice_free_value

        V = firedrake.FunctionSpace(mesh.mesh, element, dimension)
        data_function = firedrake.Function(V)
        data_function.dat.data[:] = interp_vals
        last_nonzero = next((v for v in reversed(interp_vals) if v != 0), 0)

        return InterpolateResult(data = data_function, last_nonzero_value = float(last_nonzero))

    elif extension == '.csv':
        data = pd.read_csv(data_path)
        interp_func = interp1d(data[key_value], data[data_value], bounds_error = False, fill_value = 'extrapolate')

        vertex_keys = key_dataset.dat.data_ro
        data_on_mesh = interp_func(vertex_keys)

        V = firedrake.FunctionSpace(mesh.mesh, element, dimension)
        data_function = firedrake.Function(V)
        data_function.dat.data[:] = data_on_mesh

        return InterpolateResult(data = data_function, last_nonzero_value = np.nan)

## extend_to_mesh

This projects functions (which have already been defined on a short mesh) to a mesh which may extend beyond the glacier front. Unsure yet if this will be useful. It will depend on if/how icepack2 is incorporated to permit the modeling of zero-thickness domains.  

In [None]:
def extend_to_mesh(**kwargs):
    source_function = kwargs['source_function']
    source_mesh = kwargs['source_mesh']
    target_mesh = kwargs['target_mesh']
    ice_free_value = kwargs.get('ice_free_value', None)

    # Step 1: Get distances along source mesh from vertex coordinates
    coords_src = source_mesh.mesh.coordinates.dat.data_ro[:]
    distances_src = np.insert(np.cumsum([
        np.linalg.norm(coords_src[i + 1] - coords_src[i])
        for i in range(len(coords_src) - 1)
    ]), 0, 0)

    values_src = source_function.dat.data_ro[:]
    assert len(distances_src) == len(values_src), "Mismatch in source distance and value lengths"

    # Step 2: Get distances along target mesh
    coords_tgt = target_mesh.mesh.coordinates.dat.data_ro[:]
    distances_tgt = np.insert(np.cumsum([
        np.linalg.norm(coords_tgt[i + 1] - coords_tgt[i])
        for i in range(len(coords_tgt) - 1)
    ]), 0, 0)

    # Step 3: Interpolate and apply cutoff
    interp_func = interp1d(distances_src, values_src, bounds_error=False, fill_value='extrapolate')
    values_tgt = interp_func(distances_tgt)

    if ice_free_value is not None:
        values_tgt[distances_tgt > source_mesh.glacier_length] = ice_free_value

    # Step 4: Create new function on target mesh
    V_new = firedrake.FunctionSpace(target_mesh.mesh, source_function.function_space().ufl_element())
    f_new = firedrake.Function(V_new)
    f_new.dat.data[:] = values_tgt

    last_nonzero = next((v for v in reversed(values_tgt) if v != 0), 0)

    return InterpolateResult(data=f_new, last_nonzero_value=float(last_nonzero))


## smooth_function

Sometimes this is useful I guess.

In [None]:
def smooth_function(**kwargs):
    f = kwargs['function']
    mesh = kwargs['mesh']
    sigma = kwargs.get('sigma', None)
    window_meters = kwargs.get('window', None)

    f_data = f.dat.data_ro.copy()

    # Compute dx (spacing in meters) from base mesh
    coords = mesh.mesh.coordinates.dat.data_ro.flatten()
    dx = np.mean(np.diff(coords))

    if sigma is None:
        if window_meters is None:
            raise ValueError('Must specify either "sigma" or "window_meters"')
        sigma = window_meters / dx / 2.0  # Approximate: 2σ ≈ full width at half maximum

    smoothed_data = gaussian_filter1d(f_data, sigma = sigma)

    f_smoothed = f.copy(deepcopy = True)
    f_smoothed.dat.data[:] = smoothed_data

    return f_smoothed

## solve_bed

This process requires two surface elevation observations, separated by a known amount of time, with known SMB between image dates. 

The initial glacier configuration sets the first surface elevation above a guess bed elevation. We then forward model until the date of the second observation, and compare the resulting model surface with the final reference surface. The bed is adjusted according to the misfit, and the process begins again, with the updated bed guess in place of the initial guess. This continues for a specified number of iterations. 

If only one surface observation is available, then the solver will operate under the assumption that the glacier is in steady state, so that the second reference surface is identical to the first. This assumption can be made explicitly (setting ```surface``` and ```surface_2``` to the same function) or implicitly (defining only ```surface```). 

In [None]:
@dataclass
class InversionResult:
    bed: firedrake.Function
    misfits: list
    bed_evolution: list
    surface_evolution: list
    velocity_evolution: list
    thickness_evolution: list
    s_ref: firedrake.Function
    surface: firedrake.Function
    velocity: firedrake.Function

def solve_bed(**kwargs):
    K = kwargs['K']
    num_iterations = kwargs['num_iterations'] #number of iterations for the bed inversion
    s_init = kwargs['surface'] #initial surface (first observation)
    s_ref = kwargs.get('surface_2', s_init) #final surface (second observation) 
    H_guess = kwargs['thickness_guess'] #initial thickness guess
    u_guess = kwargs['velocity'] #velocity boundary condition                    
    a = kwargs['accumulation'] #a list of one specific SMB value/function for every modeled timestep, OR a single function for all steps
    model = kwargs['model']
    solver = kwargs['solver']
    mesh = kwargs['mesh']
    A = kwargs['fluidity']

    try: num_years = s_ref.year - s_init.year #extract the time diff from the DEMs, if applicable
    except: num_years = kwargs['model_time'] #otherwise, need to choose how long to model for 

    try: Δt = round(list(a)[1] - list(a)[0], 10) #extract Δt from the SMB list, if applicable
    except: Δt = kwargs['timestep'] #otherwise, it needs to be specified
        
    Q = s_ref.function_space()

    bed_guess = firedrake.Function(Q).project(s_init - H_guess)

    misfits = []
    bed_evolution = [np.array(bed_guess.dat.data_ro.copy())]
    surface_evolution = []
    velocity_evolution = []
    thickness_evolution = []

    num_timesteps = int(num_years / Δt)

    bed_correction = firedrake.Function(Q)
    surface_misfit = firedrake.Function(Q)

    for iteration in trange(num_iterations):
        bed_mod = bed_guess.copy(deepcopy = True)
        H_mod = firedrake.Function(Q).project(s_init - bed_mod)
        H_0 = H_mod.copy(deepcopy = True)
        u_mod = u_guess.copy(deepcopy = True)
        s_mod = s_init.copy(deepcopy = True)
    
        for step in range(num_timesteps):
            
            try: accumulation = a[s_init.year + step*Δt] #if SMB is a dictionary with date keys
            except: accumulation = a #otherwise

            try:
                u_mod = solver.diagnostic_solve(
                    velocity = u_mod, thickness = H_mod, surface = s_mod, fluidity = A
                )
                H_mod = solver.prognostic_solve(
                    Δt, thickness = H_mod, velocity = u_mod,
                    thickness_inflow = H_0, accumulation = accumulation
                )
                s_mod.project(bed_mod + H_mod)
            
            except:
                print(f'Bed solver failed after {iteration} iterations')

                return InversionResult(
                    bed = bed_guess,
                    misfits = misfits,
                    bed_evolution = bed_evolution,
                    surface_evolution = surface_evolution,
                    velocity_evolution = velocity_evolution,
                    thickness_evolution = thickness_evolution,
                    s_ref = s_ref
                )

        surface_misfit.project(s_mod - s_ref)
        bed_correction.project(-K*surface_misfit)
        bed_guess.project(bed_mod + bed_correction)

        misfits.append(float(firedrake.assemble(surface_misfit*firedrake.dx)/mesh.glacier_length))
        # misfits.append(np.linalg.norm(np.array(surface_misfit.dat.data_ro.copy())))
        bed_evolution.append(np.array(bed_guess.dat.data_ro.copy()))
        surface_evolution.append(np.array(s_mod.dat.data_ro.copy()))
        velocity_evolution.append(np.array(u_mod.dat.data_ro.copy()))
        thickness_evolution.append(H_mod.dat.data_ro.copy())

    return InversionResult(
        bed = bed_guess,
        misfits = misfits,
        bed_evolution = bed_evolution,
        surface_evolution = surface_evolution,
        velocity_evolution = velocity_evolution,
        thickness_evolution = thickness_evolution,
        s_ref = s_ref,
        surface = s_mod,
        velocity = u_mod,
    )

In [None]:
# !jupyter nbconvert --to script centerflow.ipynb