
# Prepare glacier outlines + center-lines for submission to GLIMS
#### Author: Wilson Cheung, May 2025
 

In [16]:
import geopandas as gpd
from pathlib import Path
from shapely.geometry import MultiPolygon
from shapely.ops import unary_union, orient
from shapely.validation import make_valid

In [None]:
# USER INPUT BLOCK  ✏️
glacier_name = "Turner"  # name of the glacier

# input folders
base_dir   = Path("dummy_data")   # outlines
center_dir = Path("dummy_data")   # centre-lines

# output folder (will be created if absent)
out_dir    = Path("dummy_output") / glacier_name
out_dir.mkdir(parents=True, exist_ok=True)

# file paths
outline58_fp = base_dir   / f"{glacier_name}1958.shp"
outline23_fp = base_dir   / f"{glacier_name}2023.shp"
cl58_fp      = center_dir / f"{glacier_name}1958_Centerlines_smooth.shp"
cl23_fp      = center_dir / f"{glacier_name}2023_Centerlines_smooth.shp"

meta1958 = dict(orig_id="A16817", acq_time="1958-09-06", inst_name="Historical Aerial Photo from  National Air Photo Library (NAPL)")
meta2023 = dict(orig_id="20230817_154317_42_2424", acq_time="2023-08-17", inst_name="PlanetScope Satellite Imagery")

analysis_info = dict(
    analyst_name="Wilson (Wai Yin) Cheung",
    digitize_date="2023-09-20",
    method_desc="Manual digitization of glacier outlines using ArcGIS Pro, followed the guidelines established by Paul et al. (2017)"
)

# 1958 uncertainty 
UNC58_LOC  = 2    # metres   
UNC58_GLOB = 0.05

# 2023 uncertainty (PlanetScope scene)
UNC23_LOC  = 1.5     # metres
UNC23_GLOB = 7

In [None]:
def read_vector(path):                            # read + sanity CRS
    gdf = gpd.read_file(path)
    if gdf.crs is None:
        raise ValueError(f"{path} has no CRS defined.")
    return gdf

def fix_polygons(gdf):                            # validity + CCW
    repaired = []
    for geom in gdf.geometry:
        if not geom.is_valid:
            geom = make_valid(geom)
        parts = geom.geoms if isinstance(geom, MultiPolygon) else [geom]
        repaired.append(unary_union([orient(p, sign=1.) for p in parts]))
    gdf = gdf.copy(); gdf.geometry = repaired
    return gdf

def to_wgs84(gdf):
    return gdf.to_crs(4326) if gdf.crs.to_epsg() != 4326 else gdf

def _fill(gdf, col, value):
    """Create col or replace NaN; keep existing non-NaN."""
    if col not in gdf.columns:
        gdf[col] = value
    else:
        gdf[col] = gdf[col].fillna(value)

def add_attrs(gdf, meta, loc_unc_val, glob_unc_val):
    gdf = gdf[['geometry']].copy()  # Keep only the geometry column
    for c in ("loc_unc_x", "loc_unc_y"):   _fill(gdf, c, loc_unc_val)
    for c in ("glob_unc_x","glob_unc_y"):  _fill(gdf, c, glob_unc_val)
    gdf["glacier_name"]       = glacier_name
    gdf["orig_id"]    = meta["orig_id"]
    gdf["acq_time"]   = meta["acq_time"]
    gdf["inst_name"]  = meta["inst_name"]
    gdf["analyst"]       = analysis_info["analyst_name"]
    gdf["digitize_date"] = analysis_info["digitize_date"]
    gdf["method"]        = analysis_info["method_desc"]
    return gdf

def prep_centerline(path, meta):
    cl = to_wgs84(read_vector(path))
    cl = cl[['geometry']].copy()  # keep only the geometry column
    cl["glacier_name"] = glacier_name
    cl["line_type"]    = "centerline"
    cl["inst_name"]    = meta["inst_name"]
    cl["author"]       = "Wilson (Wai Yin) Cheung"
    cl["method"]       = "The Open Global Glacier Model (OGGM) v1.6"
    return cl

In [19]:
# Read and print attribute columns for each shapefile
gdf_outline58 = read_vector(outline58_fp)
print("Attributes in outline58_fp:")
print(gdf_outline58.columns.tolist())

gdf_outline23 = read_vector(outline23_fp)
print("\nAttributes in outline23_fp:")
print(gdf_outline23.columns.tolist())

gdf_cl58 = read_vector(cl58_fp)
print("\nAttributes in cl58_fp:")
print(gdf_cl58.columns.tolist())

gdf_cl23 = read_vector(cl23_fp)
print("\nAttributes in cl23_fp:")
print(gdf_cl23.columns.tolist())

Attributes in outline58_fp:
['Shape_Leng', 'Shape_Area', 'geometry']

Attributes in outline23_fp:
['Shape_Leng', 'Shape_Area', 'geometry']

Attributes in cl58_fp:
['RGIID', 'SEGMENT_ID', 'STRAHLER', 'OUTFLOW_ID', 'LE_SEGMENT', 'MAIN', 'geometry']

Attributes in cl23_fp:
['RGIID', 'SEGMENT_ID', 'STRAHLER', 'OUTFLOW_ID', 'LE_SEGMENT', 'MAIN', 'geometry']


In [None]:
# Visualize outlines and centerlines
ax = gdf_outline58.plot(edgecolor='blue', facecolor='none', figsize=(8,8), label='1958 outline')
gdf_outline23.plot(ax=ax, edgecolor='red', facecolor='none', label='2023 outline')
gdf_cl58.plot(ax=ax, color='cyan', label='1958 centerline')
gdf_cl23.plot(ax=ax, color='magenta', label='2023 centerline')
ax.legend()
ax.set_title('Dummy glacier data')

In [20]:
# PROCESS
print("Loading input layers …")
gdf58_raw = read_vector(outline58_fp)
gdf23_raw = read_vector(outline23_fp)

print("Repairing geometry & reprojecting …")
gdf58 = add_attrs(to_wgs84(fix_polygons(gdf58_raw)), meta1958,
                  loc_unc_val=UNC58_LOC,  glob_unc_val=UNC58_GLOB)
gdf23 = add_attrs(to_wgs84(fix_polygons(gdf23_raw)), meta2023,
                  loc_unc_val=UNC23_LOC,  glob_unc_val=UNC23_GLOB)

cl58  = prep_centerline(cl58_fp, meta1958)
cl23  = prep_centerline(cl23_fp, meta2023)

Loading input layers …
Repairing geometry & reprojecting …


In [21]:
# EXPORT
out58_fp = out_dir / f"{glacier_name}1958.shp"
out23_fp = out_dir / f"{glacier_name}2023.shp"
cl58_out = out_dir / f"{glacier_name}1958_centerline.shp"
cl23_out = out_dir / f"{glacier_name}2023_centerline.shp"

gdf58.to_file(out58_fp); print("✔ wrote", out58_fp)
gdf23.to_file(out23_fp); print("✔ wrote", out23_fp)
cl58 .to_file(cl58_out); print("✔ wrote", cl58_out)
cl23 .to_file(cl23_out); print("✔ wrote", cl23_out)

print("\nAll four layers are GLIMS-ready and stored in", out_dir)

✔ wrote D:\outputforGLIMS\Fork\Fork1959.shp
✔ wrote D:\outputforGLIMS\Fork\Fork2023.shp
✔ wrote D:\outputforGLIMS\Fork\Fork1959_centerline.shp
✔ wrote D:\outputforGLIMS\Fork\Fork2023_centerline.shp

All four layers are GLIMS-ready and stored in D:\outputforGLIMS\Fork


  gdf58.to_file(out58_fp); print("✔ wrote", out58_fp)
  gdf23.to_file(out23_fp); print("✔ wrote", out23_fp)
  cl58 .to_file(cl58_out); print("✔ wrote", cl58_out)
  cl23 .to_file(cl23_out); print("✔ wrote", cl23_out)


In [22]:
# Read output shapefiles
gdf_out_1958 = gpd.read_file(out58_fp)
gdf_out_2023 = gpd.read_file(out23_fp)
gdf_cl_out_1958 = gpd.read_file(cl58_out)
gdf_cl_out_2023 = gpd.read_file(cl23_out)

print("Columns for the 1958 outline output:")
print(gdf_out_1958.columns.tolist())

print("\nColumns for the 2023 outline output:")
print(gdf_out_2023.columns.tolist())

print("\nColumns for the 1958 centerline output:")
print(gdf_cl_out_1958.columns.tolist())

print("\nColumns for the 2023 centerline output:")
print(gdf_cl_out_2023.columns.tolist())

Columns for the 1958 outline output:
['loc_unc_x', 'loc_unc_y', 'glob_unc_x', 'glob_unc_y', 'glacier_na', 'orig_id', 'acq_time', 'inst_name', 'analyst', 'digitize_d', 'method', 'geometry']

Columns for the 2023 outline output:
['loc_unc_x', 'loc_unc_y', 'glob_unc_x', 'glob_unc_y', 'glacier_na', 'orig_id', 'acq_time', 'inst_name', 'analyst', 'digitize_d', 'method', 'geometry']

Columns for the 1958 centerline output:
['glacier_na', 'line_type', 'inst_name', 'author', 'method', 'geometry']

Columns for the 2023 centerline output:
['glacier_na', 'line_type', 'inst_name', 'author', 'method', 'geometry']
