In [None]:
from pathlib import Path
import geopandas as gpd
import matplotlib.pyplot as plt

from geoworkflow.processors.extraction.open_buildings_gcs import OpenBuildingsGCSProcessor
from geoworkflow.schemas.open_buildings_gcs_config import OpenBuildingsGCSConfig

import logging

logging.getLogger('geoworkflow').setLevel(logging.DEBUG)

In [None]:
from pathlib import Path
from geoworkflow.core.logging_setup import setup_logging
from geoworkflow.processors.extraction.open_buildings_gcs import OpenBuildingsGCSProcessor
from geoworkflow.schemas.open_buildings_gcs_config import OpenBuildingsGCSConfig

# Configure logging to save to a file
setup_logging(
    level="DEBUG",
    log_file=Path("logs/gcs_buildings_batch.log"),  # Your logs will go here
    enable_console=True  # Also show in console
)

output_dir = Path("../../data/01_extracted/buildings")
output_dir.mkdir(parents=True, exist_ok=True)

# Create config for batch processing
config = OpenBuildingsGCSConfig(
    aoi_file="africapolis",  # Batch mode
    output_dir=output_dir,
    country=["GHA", "TGO"],  # Ghana and Togo
    # city=["Accra", "Kumasi"],  # Optional: filter specific cities
    confidence_threshold=0.75,
    min_area_m2=10.0,
    export_format="geojson",
    num_workers=4,
    include_confidence=True,
    include_area=True,
    overwrite_existing=False
)

# Run extraction
processor = OpenBuildingsGCSProcessor(config)
result = processor.process()
print(result)

In [None]:
# Optional: Inspect batch results
if result.success:
    print(f"\nâœ… Batch Processing Results:")
    print(f"Total cities processed: {result.total_count}")
    print(f"Successful: {result.succeeded_count}")
    print(f"Failed: {result.failed_count}")
    
    if result.succeeded:
        print(f"\nSucceeded cities: {', '.join(result.succeeded)}")
    
    if result.failed:
        print(f"\nFailed cities:")
        for city, error in result.failed.items():
            print(f"  - {city}: {error}")
    
    print(f"\nOutput files: {len(result.output_files)}")

In [None]:
# Optional: Load and visualize one of the results
if result.success and result.output_files:
    # Load the first city's buildings
    first_output = result.output_files[0]
    buildings = gpd.read_file(first_output)
    
    print(f"Loaded {len(buildings)} buildings from {first_output.parent.name}")
    print(f"Columns: {list(buildings.columns)}")
    
    # Quick visualization
    fig, ax = plt.subplots(1, 1, figsize=(12, 10))
    buildings.plot(ax=ax, column='confidence', cmap='viridis', legend=True)
    ax.set_title(f"Buildings - {first_output.parent.name}")
    plt.tight_layout()
    plt.show()