In [1]:
from pathlib import Path
import geopandas as gpd
import matplotlib.pyplot as plt

from geoworkflow.processors.extraction.osm_highways import OSMHighwaysProcessor
from geoworkflow.schemas.osm_highways_config import OSMHighwaysConfig

import logging

logging.getLogger('geoworkflow').setLevel(logging.DEBUG)



In [2]:
from pathlib import Path
from geoworkflow.core.logging_setup import setup_logging
from geoworkflow.processors.extraction.osm_highways import OSMHighwaysProcessor
from geoworkflow.schemas.osm_highways_config import OSMHighwaysConfig

# Configure logging to save to a file
setup_logging(
    level="DEBUG",
    log_file=Path("logs/osm_highways_timing.log"),  # Your timing logs will go here
    enable_console=True  # Also show in console
)

output_dir = Path("../../data/01_extracted/highways_")
output_dir.mkdir(parents=True, exist_ok=True)

# Create config
config = OSMHighwaysConfig(
    aoi_file="africapolis",
    output_dir=output_dir,
    highway_types="all",  # Extract all highway types
    include_attributes=["highway", "name", "surface", "lanes"],
    country = ["GHA","TGO"],
    export_format="geojson",
    force_redownload= False,
    max_cache_age_days=30  # Warn if data >30 days old
)

# Run extraction
processor = OSMHighwaysProcessor(config)
result = processor.process()
print(result)

2025-11-15 11:47:21,074 - geoworkflow.OSMHighwaysProcessor - INFO - Filters: country=['GHA', 'TGO'], city=None
2025-11-15 11:47:21,180 - geoworkflow.OSMHighwaysProcessor - INFO - Processing with 4 parallel threads (shared memory)
2025-11-15 11:47:21,182 - geoworkflow.OSMHighwaysProcessor - INFO -   Loading PBF for GHA...
2025-11-15 11:47:22,333 - geoworkflow.utils.geofabrik_utils - INFO - Using cached PBF: /Users/juancheeto/Library/CloudStorage/Box-Box/UrbanStructureStudies/AfricaProject/data/.cache/osm/ghana-latest.osm.pbf (downloaded 10 days ago, 101.9 MB)


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  edges, nodes = prepare_geodataframe(


2025-11-15 11:48:47,703 - geoworkflow.OSMHighwaysProcessor - INFO -   Loaded 370,141 highway segments
2025-11-15 11:48:47,705 - geoworkflow.OSMHighwaysProcessor - INFO -   Processing 228 geometries for GHA...
2025-11-15 11:48:47,799 - geoworkflow.utils.osm_utils - INFO - Bounding box pre-filter: 370141 → 6646 (98.2% reduction)
2025-11-15 11:48:47,804 - geoworkflow.utils.osm_utils - INFO - Bounding box pre-filter: 370141 → 11923 (96.8% reduction)
2025-11-15 11:48:47,858 - geoworkflow.utils.osm_utils - INFO - Bounding box pre-filter: 370141 → 50991 (86.2% reduction)
2025-11-15 11:48:47,903 - geoworkflow.utils.osm_utils - INFO - R-tree spatial index: 6646 → 6207 (6.6% additional reduction)
2025-11-15 11:48:47,918 - geoworkflow.utils.osm_utils - INFO - Bounding box pre-filter: 370141 → 117408 (68.3% reduction)
2025-11-15 11:48:47,925 - geoworkflow.utils.osm_utils - INFO - R-tree spatial index: 11923 → 11439 (4.1% additional reduction)
2025-11-15 11:48:48,008 - geoworkflow.utils.osm_utils -