# OSM Highway Extraction Example

This notebook demonstrates how to extract highway networks from OpenStreetMap
using the OSM Highways processor.

## Features
- Download regional OSM data from Geofabrik
- Extract highways for any AOI
- Filter by highway type
- Export to multiple formats
- Track data freshness

In [None]:
# Imports
from pathlib import Path
import geopandas as gpd
import matplotlib.pyplot as plt

from geoworkflow.processors.extraction.osm_highways import OSMHighwaysProcessor
from geoworkflow.schemas.osm_highways_config import OSMHighwaysConfig
from geoworkflow.utils.geofabrik_utils import list_cached_pbfs

## Example 1: Basic Extraction

Extract all highways for a city AOI.

In [None]:
# Setup paths
aoi_file = Path("../data/aoi/nairobi_aoi.geojson")
output_dir = Path("../data/extracted/highways")
output_dir.mkdir(parents=True, exist_ok=True)

# Create config
config = OSMHighwaysConfig(
    aoi_file=aoi_file,
    output_dir=output_dir,
    geofabrik_regions=["kenya"],  # Or None for auto-detect
    highway_types="all",  # Extract all highway types
    include_attributes=["highway", "name", "surface", "lanes"],
    export_format="geojson",
    max_cache_age_days=30  # Warn if data >30 days old
)

# Run extraction
processor = OSMHighwaysProcessor(config)
result = processor.process()

print(f"Success: {result.success}")
print(f"Extracted: {result.processed_count} highway segments")
print(f"Output: {processor.output_file}")

## Example 2: Major Roads Only

Extract only major roads (motorway through tertiary).

In [None]:
config_major = OSMHighwaysConfig(
    aoi_file=aoi_file,
    output_dir=output_dir,
    highway_types=["motorway", "trunk", "primary", "secondary", "tertiary"],
    include_attributes=["highway", "name", "ref", "maxspeed"],
    export_format="geojson"
)

processor_major = OSMHighwaysProcessor(config_major)
result_major = processor_major.process()

print(f"Major roads extracted: {result_major.processed_count}")

## Example 3: Visualize Results

In [None]:
# Load results
highways = gpd.read_file(processor.output_file)
aoi = gpd.read_file(aoi_file)

# Plot
fig, ax = plt.subplots(figsize=(12, 12))
aoi.boundary.plot(ax=ax, color='red', linewidth=2, label='AOI')

# Color by highway type
highway_colors = {
    'motorway': 'darkblue',
    'trunk': 'blue',
    'primary': 'green',
    'secondary': 'orange',
    'tertiary': 'yellow',
    'residential': 'lightgray'
}

for htype, color in highway_colors.items():
    subset = highways[highways['highway'] == htype]
    if len(subset) > 0:
        subset.plot(ax=ax, color=color, linewidth=1, label=htype)

plt.legend()
plt.title("Extracted Highway Network")
plt.show()

## Example 4: Check Cache Status

In [None]:
cache_dir = Path.home() / ".geoworkflow" / "osm_cache"

print("Cached PBF files:")
for region, meta in list_cached_pbfs(cache_dir):
    print(f"  {region}: {meta.file_size_mb:.1f} MB, {meta.age_days()} days old")
    print(f"    Downloaded: {meta.download_date}")
    print(f"    Source: {meta.geofabrik_url}")

## Example 5: Multi-Region Extraction

Extract highways spanning multiple countries.

In [None]:
# Example: East African corridor
config_multi = OSMHighwaysConfig(
    aoi_file=Path("../data/aoi/east_africa_corridor.geojson"),
    output_dir=output_dir,
    geofabrik_regions=["kenya", "tanzania", "uganda"],
    highway_types=["motorway", "trunk", "primary"],
    export_format="geoparquet"  # Better for large datasets
)

processor_multi = OSMHighwaysProcessor(config_multi)
result_multi = processor_multi.process()

print(f"Multi-region extraction: {result_multi.processed_count} segments")

## Example 6: Export to Multiple Formats

In [None]:
# Export same data to different formats
formats = ["geojson", "shapefile", "geoparquet"]

for fmt in formats:
    config_fmt = OSMHighwaysConfig(
        aoi_file=aoi_file,
        output_dir=output_dir / fmt,
        geofabrik_regions=["kenya"],
        export_format=fmt
    )
    
    processor_fmt = OSMHighwaysProcessor(config_fmt)
    result_fmt = processor_fmt.process()
    
    file_size = processor_fmt.output_file.stat().st_size / (1024**2)
    print(f"{fmt}: {file_size:.2f} MB")