In [1]:
#!/usr/bin/env python3
"""
Download and prepare a drive-only OSM network around Hospital do Barreiro.

Outputs in the 'data' directory:
    - drive_nodes.parquet
    - drive_edges.parquet

These files are in the format expected by the Streamlit routing app.
"""

from pathlib import Path

import geopandas as gpd
import networkx as nx
import osmnx as ox


DATA_DIR: Path = Path("data")

# Hospital Nossa Senhora do Rosario, Barreiro
HOSPITAL_BARREIRO_CENTER: tuple[float, float] = (38.657111, -9.059832)
RADIUS_METERS: int = 30_000


def configure_osmnx() -> None:
    """
    Configure global OSMnx settings for caching and logging.
    """
    ox.settings.use_cache = True
    ox.settings.log_console = True


def download_drive_graph(
    center_point: tuple[float, float],
    dist_meters: int,
) -> nx.MultiDiGraph:
    """
    Download a drive network around a center point from OSM.

    Args:
        center_point: (latitude, longitude) of the center.
        dist_meters: Radius in meters for the network extraction.

    Returns:
        OSMnx MultiDiGraph for the drive network.
    """
    graph: nx.MultiDiGraph = ox.graph_from_point(
        center_point,
        dist=dist_meters,
        network_type="drive",
        simplify=True,
    )
    return graph


def graph_to_parquet(
    graph: nx.MultiDiGraph,
    output_dir: Path,
    prefix: str = "drive",
) -> None:
    """
    Project graph to a metric CRS, convert to GeoDataFrames, and write Parquet.

    This produces:
        output_dir / f"{prefix}_nodes.parquet"
        output_dir / f"{prefix}_edges.parquet"

    The nodes file has a "node_id" column with OSM node ids.
    The edges file has at least "u", "v", "length", and "geometry".
    """
    output_dir.mkdir(parents=True, exist_ok=True)

    # Project to a local metric CRS
    graph_proj: nx.MultiDiGraph = ox.project_graph(graph)

    # Convert to GeoDataFrames
    nodes, edges = ox.graph_to_gdfs(graph_proj)

    # NODES: keep index as node_id, do not reset
    nodes = nodes.copy()
    nodes["node_id"] = nodes.index

    # EDGES: reset index so u, v, key become columns
    edges = edges.reset_index(drop=False)

    # Columns required by the Streamlit app
    node_cols = ["node_id", "x", "y", "geometry"]
    edge_cols = ["u", "v", "length", "geometry"]

    node_cols_existing = [col for col in node_cols if col in nodes.columns]
    edge_cols_existing = [col for col in edge_cols if col in edges.columns]

    nodes_out: gpd.GeoDataFrame = nodes[node_cols_existing].copy()
    edges_out: gpd.GeoDataFrame = edges[edge_cols_existing].copy()

    # Basic sanity checks
    if "node_id" not in nodes_out.columns:
        raise RuntimeError("nodes Parquet must contain a 'node_id' column.")

    for col in ["u", "v", "length"]:
        if col not in edges_out.columns:
            raise RuntimeError(f"edges Parquet must contain column '{col}'.")

    # Write Parquet
    nodes_path: Path = output_dir / f"{prefix}_nodes.parquet"
    edges_path: Path = output_dir / f"{prefix}_edges.parquet"

    nodes_out.to_parquet(nodes_path)
    edges_out.to_parquet(edges_path)

    print(f"Saved drive nodes to {nodes_path}")
    print(f"Saved drive edges to {edges_path}")
    print(f"Number of nodes: {len(nodes_out)}")
    print(f"Number of edges: {len(edges_out)}")


def main() -> None:
    """
    Configure OSMnx, download the drive graph for Barreiro, and export to Parquet.
    """
    configure_osmnx()

    print("Downloading drive network around Hospital do Barreiro...")
    print(f"Center: {HOSPITAL_BARREIRO_CENTER}, radius: {RADIUS_METERS} m")
    graph = download_drive_graph(HOSPITAL_BARREIRO_CENTER, RADIUS_METERS)

    print(
        "Downloaded graph with "
        f"{graph.number_of_nodes()} nodes and {graph.number_of_edges()} edges"
    )

    print("Exporting drive network to Parquet...")
    graph_to_parquet(graph, DATA_DIR, prefix="drive")
    print("All done.")


if __name__ == "__main__":
    main()


Downloading drive network around Hospital do Barreiro...
Center: (38.657111, -9.059832), radius: 30000 m
Downloaded graph with 86903 nodes and 194325 edges
Exporting drive network to Parquet...
Saved drive nodes to data\drive_nodes.parquet
Saved drive edges to data\drive_edges.parquet
Number of nodes: 86903
Number of edges: 194325
All done.
