# Global Healthy and Sustainable City Indicators (GHSCI)

## Cycling Indicators

This notebook prepares the inputs required to calculate cycling indicators. It builds a multimodal transport network (walking and cycling), generates sample points that represent origins, and compiles destination datasets covering public transport stops, public open space, and fresh-food markets. Details of the OSM tags and destination categories are provided in the [OSM destination definitions](https://github.com/healthysustainablecities/global-indicators/blob/main/process/configuration/templates/osm_destination_definitions.csv) folder .


Step 1: Create the configuration file

Use the GHSCI software to create the intended city’s configuration file from the template.

In [None]:
import ghsci 
codename = "Maribyrnong" 
ghsci.configure(codename)

Step 2: Adjust the configuration file

Update the configuration so it is appropriate for the intended city. For example, set the correct paths to the OSM data and population datasets.


Step 3: Run the analysis

Load the intended city’s region, then run the analysis function to generate the required datasets.

In [None]:
r = ghsci.Region(codename) 
r.analysis()

Step 4: Save the datasets

Export the generated datasets for use in the cycling indicator generation workflow. The code retrieves the active travel network, origins and destinations from Postgres and writes them to GeoPackages:
1. network.gpkg
2. origins.gpkg
3. destinations.gpkg

It then copies these files to the local environment, making them available for downstream processes and analysis.


In [None]:
import os, re
import pandas as pd
import geopandas as gpd
from sqlalchemy import create_engine, text

# ========= Config =========
PG_URL = "postgresql+psycopg2://postgres:ghscic@gateway.docker.internal:5433/maribyrnong"
ENGINE = create_engine(PG_URL)

OUT_DIR = f"/data/_study_region_outputs/{codename}"
os.makedirs(OUT_DIR, exist_ok=True)

NETWORK_GPKG = os.path.join(OUT_DIR, f"{codename}_network.gpkg")
ORIG_GPKG    = os.path.join(OUT_DIR, f"{codename}_origins.gpkg")
DEST_GPKG    = os.path.join(OUT_DIR, f"{codename}_destinations.gpkg")

for p in (NETWORK_GPKG, ORIG_GPKG, DEST_GPKG):
    if os.path.exists(p):
        os.remove(p)

# ========= Helpers =========
def list_spatial_tables(schema="public"):
    sql = text("""
        SELECT f_table_schema AS schema, f_table_name AS name, f_geometry_column AS geom_col
        FROM public.geometry_columns
        WHERE f_table_schema = :s
        GROUP BY f_table_schema, f_table_name, f_geometry_column
        ORDER BY f_table_name
    """)
    with ENGINE.connect() as con:
        return pd.read_sql_query(sql, con, params={"s": schema})

def has_column(schema, table, col):
    sql = text("""
        SELECT 1
        FROM information_schema.columns
        WHERE table_schema=:s AND table_name=:t AND column_name=:c
        LIMIT 1
    """)
    with ENGINE.connect() as con:
        return pd.read_sql_query(sql, con, params={"s": schema, "t": table, "c": col}).shape[0] > 0

def non_geom_cols(schema, table, geom_cols=("geom","geom_4326")):
    sql = text("""
        SELECT column_name
        FROM information_schema.columns
        WHERE table_schema=:s AND table_name=:t
          AND column_name <> ALL(:gcols)
        ORDER BY ordinal_position
    """)
    with ENGINE.connect() as con:
        cols = pd.read_sql_query(sql, con, params={"s": schema, "t": table, "gcols": list(geom_cols)})
    return [f'"{c}"' for c in cols["column_name"].tolist()]

def read_table(schema, table):
    """Load any spatial table; prefer geom_4326 else transform geom to 4326."""
    cols_non_geom = ", ".join(non_geom_cols(schema, table))
    select_cols = (cols_non_geom + ", ") if cols_non_geom else ""
    if has_column(schema, table, "geom_4326"):
        sql = text(f'SELECT {select_cols}"geom_4326" FROM "{schema}"."{table}"')
        geom_col = "geom_4326"
    else:
        sql = text(f'SELECT {select_cols}ST_Transform("geom", 4326) AS geom_4326 FROM "{schema}"."{table}"')
        geom_col = "geom_4326"
    with ENGINE.connect() as con:
        gdf = gpd.read_postgis(sql, con, geom_col=geom_col)

    # Make wierd dtypes strings so OGR keeps them
    for c in gdf.columns:
        if c != geom_col and not (
            pd.api.types.is_integer_dtype(gdf[c]) or
            pd.api.types.is_float_dtype(gdf[c]) or
            pd.api.types.is_bool_dtype(gdf[c]) or
            pd.api.types.is_string_dtype(gdf[c])
        ):
            gdf[c] = gdf[c].map(lambda x: None if x is None else str(x))

    return gdf.set_crs(4326, allow_override=True)

def save_layer(gdf, gpkg_path, layer_name):
    if gdf.empty:
        print(f" - skip {layer_name}: empty")
        return
    mode = "w" if not os.path.exists(gpkg_path) else "a"
    # Light name tidy so QGIS is happy
    gdf = gdf.rename(columns={c: c.replace(":", "_") for c in gdf.columns})
    gdf.to_file(gpkg_path, layer=layer_name, driver="GPKG", mode=mode)
    print(f" + wrote {layer_name} -> {gpkg_path} ({len(gdf)} features)")

def match_any(name, pats):
    return any(re.search(p, name, flags=re.IGNORECASE) for p in pats)

# ========= Discover patterns =========
ORIGIN_PATTERNS = [r"origin", r"origins", r"sample_point", r"population_point", r"grid_origin", r"pop_grid"]
DEST_PATTERNS = [
    r"dest", r"destination",
    r"fresh[_\- ]?food", r"grocery", r"supermarket", r"market",
    r"public[_\- ]?open[_\- ]?space", r"\bpos\b", r"open[_\- ]?space", r"park",
    r"gtfs", r"stop", r"stops", r"station", r"transit", r"pt[_\- ]?stop", r"tram", r"bus", r"rail",
]
NETWORK_PATTERNS = [
    ("public", "edges", "edges"),
    ("public", "edges_simplified", "edges_simplified"),
    ("public", "nodes", "nodes"),
    ("public", "intersections_osmnx_12m", "intersections"),
]

# ========= Exporters =========
def export_network(gpkg_path=NETWORK_GPKG, schema="public"): 

    for s, t, layer in NETWORK_PATTERNS:
        gdf = read_table(s, t)
        save_layer(gdf, gpkg_path, layer)

def export_origins_and_destinations(schema="public"):
    candidates = list_spatial_tables(schema)
    
    origin_tables = sorted([t for t in candidates["name"] if match_any(t, ORIGIN_PATTERNS)])
    dest_tables   = sorted([t for t in candidates["name"] if match_any(t, DEST_PATTERNS)])

    print("Origin candidates:", origin_tables)
    print("Destination candidates:", dest_tables)

    for t in origin_tables:
        try:
            gdf = read_table(schema, t)
            if not gdf.empty:
                save_layer(gdf, ORIG_GPKG, t)
        except Exception as e:
            print(f" ! origin '{t}' failed: {e}")

    for t in dest_tables:
        try:
            gdf = read_table(schema, t)
            if not gdf.empty:
                save_layer(gdf, DEST_GPKG, t)
        except Exception as e:
            print(f" ! destination '{t}' failed: {e}")

# ========= Run =========
if __name__ == "__main__":
    print("Exporting network…")
    export_network()
    print("\nExporting origins/destinations…")
    export_origins_and_destinations()
    print("\nDone.")
    print(f"Network GPKG:      {NETWORK_GPKG}   (exists: {os.path.exists(NETWORK_GPKG)})")
    print(f"Origins GPKG:      {ORIG_GPKG}      (exists: {os.path.exists(ORIG_GPKG)})")
    print(f"Destinations GPKG: {DEST_GPKG}      (exists: {os.path.exists(DEST_GPKG)})")


In [None]:
import os
import shutil
from typing import Optional, Dict

# ========= Helper =========
def copy_outputs_to_container(
    codename: str,
    network_gpkg: str,
    origins_gpkg: str,
    destinations_gpkg: str,
    base_dst: str = "/home/ghsci/process/data/_study_region_outputs",
) -> Dict[str, Optional[str]]:
    
    dst_dir = os.path.join(base_dst, codename)
    os.makedirs(dst_dir, exist_ok=True)

    results = {}
    for src in (network_gpkg, origins_gpkg, destinations_gpkg):
        label = os.path.basename(src) if src else "(missing path)"
        if not src or not os.path.exists(src):
            print(f"✗ Not found: {src}")
            results[src] = None
            continue
        dst = os.path.join(dst_dir, os.path.basename(src))
        shutil.copy2(src, dst)
        print(f"Copied {label} -> {dst}")
        results[src] = dst

    print(f"\nDone. Target folder: {dst_dir}")
    return results

# ========= Run =========
copy_outputs_to_container(codename, NETWORK_GPKG, ORIG_GPKG, DEST_GPKG)
