diff --git a/README.md b/README.md index 403fd44..102b99f 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,188 @@ -# raw-data-api-py -A Python API wrapper for the raw-data-api service. +# OSM Data Client + +A Python client for the Humanitarian OpenStreetMap Team (HOT) Raw Data API. + +## Installation + +```bash +pip install raw-data-api-py +``` + +## Conceptual Overview + +The OSM Data Client allows you to extract OpenStreetMap data for specific geographic areas through the HOT Raw Data API. The workflow follows this pattern: + +1. Define an area of interest (GeoJSON polygon) +2. Configure filters for specific OpenStreetMap features +3. Submit a request and wait for processing +4. Download and use the resulting data + +## Quick Start + +```python +import asyncio +from osm_data_client import get_osm_data + +async def main(): + # Define area of interest + geometry = { + "type": "Polygon", + "coordinates": [[ + [-73.98, 40.75], # NYC area + [-73.98, 40.76], + [-73.97, 40.76], + [-73.97, 40.75], + [-73.98, 40.75] + ]] + } + + # Request building data + result = await get_osm_data( + geometry, + fileName="nyc_buildings", + outputType="geojson", + filters={ + "tags": { + "all_geometry": { + "building": [] # All buildings + } + } + } + ) + + print(f"Data downloaded to: {result.path}") + +if __name__ == "__main__": + asyncio.run(main()) +``` + +## Command-Line Interface + +Extract data using the CLI: + +```bash +python -m osm_data_client.cli --bounds -73.98 40.75 -73.97 40.76 --feature-type building --out buildings.geojson +``` + +## Key Components + +- **get_osm_data**: Main function for simple requests +- **RawDataClient**: Configurable client for advanced usage +- **GeometryInput**: Handles polygon validation +- **RequestParams**: Handles request configuration +- **RawDataResult**: Contains the result file path and metadata + +## Common Use Cases + +### Configuring Output Directory + +```python +from osm_data_client import RawDataClient, RawDataClientConfig + +config = RawDataClientConfig(output_directory="/path/to/outputs") +client = RawDataClient(config) + +result = await client.get_osm_data(geometry, **params) +``` + +### Controlling File Extraction + +```python +from osm_data_client import RawDataOutputOptions, AutoExtractOption + +# Always extract from zip archives +options = RawDataOutputOptions(auto_extract=AutoExtractOption.force_extract) + +result = await client.get_osm_data(geometry, options, **params) +``` + +### Using Different Output Formats + +```python +# GeoJSON example +result = await get_osm_data( + geometry, + outputType="geojson", + filters={"tags": {"all_geometry": {"building": []}}} +) + +# Shapefile example +result = await get_osm_data( + geometry, + outputType="shp", + filters={"tags": {"all_geometry": {"highway": []}}} +) +``` + +## Error Handling + +The client uses specific exception types for different errors: + +```python +from osm_data_client.exceptions import ValidationError, APIRequestError + +try: + result = await get_osm_data(geometry, **params) +except ValidationError as e: + print(f"Invalid input: {e}") +except APIRequestError as e: + print(f"API error: {e}") +``` + +## API Reference + +### Core Functions + +```python +async def get_osm_data( + geometry: Union[Dict[str, Any], str], + **kwargs +) -> RawDataResult +``` + +### Client Classes + +```python +class RawDataClient: + async def get_osm_data( + self, + geometry: Union[Dict[str, Any], str], + output_options: RawDataOutputOptions = RawDataOutputOptions.default(), + **kwargs + ) -> RawDataResult +``` + +### Configuration Classes + +```python +@dataclass +class RawDataClientConfig: + access_token: Optional[str] = None + memory_threshold_mb: int = 50 + base_api_url: str = "https://api-prod.raw-data.hotosm.org/v1" + output_directory: Path = Path.cwd() +``` + +```python +class AutoExtractOption(Enum): + automatic = auto() # Decide based on format and size + force_zip = auto() # Always keep as zip + force_extract = auto() # Always extract +``` + +## CLI Options + +``` +python -m osm_data_client.cli [options] + +Options: + --geojson PATH Path to GeoJSON file or GeoJSON string + --bounds XMIN YMIN XMAX YMAX + Bounds coordinates in EPSG:4326 + --feature-type TYPE Type of feature to download (default: "building") + --out PATH Output path (default: "./osm_data.geojson") + --format FORMAT Output format (geojson, shp, kml, etc.) + --no-zip Do not request data as a zip file + --extract Extract files from zip archive + --verbose, -v Enable verbose logging +``` diff --git a/osm_data_client/cli.py b/osm_data_client/cli.py index 35384d1..953254f 100644 --- a/osm_data_client/cli.py +++ b/osm_data_client/cli.py @@ -56,9 +56,9 @@ async def run_cli(args: argparse.Namespace) -> int: """ try: # Prepare geometry - if args.bbox: - log.info("Using bounding box: %s", args.bbox) - geometry = bbox_to_polygon(*args.bbox) + if args.bounds: + log.info("Using bounding box: %s", args.bounds) + geometry = bbox_to_polygon(*args.bounds) else: geojson_path = Path(args.geojson) if geojson_path.exists(): @@ -150,11 +150,11 @@ def main() -> int: "--geojson", type=str, help="Path to GeoJSON file or GeoJSON string." ) group.add_argument( - "--bbox", + "--bounds", nargs=4, type=float, metavar=("xmin", "ymin", "xmax", "ymax"), - help="Bounding box coordinates (assumed to be in EPSG:4326).", + help="Bounds coordinates (assumed to be in EPSG:4326).", ) parser.add_argument( @@ -224,8 +224,8 @@ def main() -> int: print(f"OSM Data Client version {ver}") return 0 - if not args.geojson and not args.bbox: - parser.error("one of the arguments --geojson --bbox is required") + if not args.geojson and not args.bounds: + parser.error("one of the arguments --geojson --bounds is required") # Run the CLI asynchronously return asyncio.run(run_cli(args)) diff --git a/tests/test_cli.py b/tests/test_cli.py index e3c62a9..3469511 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -98,13 +98,13 @@ def test_missing_required_args(self): result = self.run_cli_command([], check=False) assert result.returncode != 0 - assert "--geojson" in result.stderr and "--bbox" in result.stderr + assert "--geojson" in result.stderr and "--bounds" in result.stderr @pytest.mark.skipif( os.environ.get("SKIP_API_TESTS") == "1", reason="Skipping tests that require API access" ) - def test_bbox_download(self): + def test_bounds_download(self): """Test downloading data for a bounding box.""" output_file = OUTPUT_DIR / "bbox_test.geojson" @@ -113,7 +113,7 @@ def test_bbox_download(self): output_file.unlink() result = self.run_cli_command([ - "--bbox", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]), + "--bounds", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]), "--feature-type", "building", "--out", str(output_file) ]) @@ -168,7 +168,7 @@ def test_extract_option(self): output_file.unlink() result = self.run_cli_command([ - "--bbox", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]), + "--bounds", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]), "--feature-type", "building", "--out", str(output_file), "--extract" # Force extraction @@ -203,7 +203,7 @@ def test_no_zip_option(self): output_file.unlink() result = self.run_cli_command([ - "--bbox", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]), + "--bounds", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]), "--feature-type", "building", "--out", str(output_file), "--no-zip" # Request unzipped data @@ -237,7 +237,7 @@ def test_different_formats_cli(self): shutil.rmtree(csv_dir) result = self.run_cli_command([ - "--bbox", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]), + "--bounds", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]), "--feature-type", "building", "--out", str(output_file), "--format", "csv" # Test CSV format @@ -279,7 +279,7 @@ def test_custom_api_url(self): output_file.unlink() result = self.run_cli_command([ - "--bbox", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]), + "--bounds", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]), "--feature-type", "building", "--out", str(output_file), "--api-url", "https://api-prod.raw-data.hotosm.org/v1" # Explicit API URL