Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 188 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,188 @@
# raw-data-api-py
A Python API wrapper for the raw-data-api service.
# OSM Data Client

A Python client for the Humanitarian OpenStreetMap Team (HOT) Raw Data API.

## Installation

```bash
pip install raw-data-api-py
```

## Conceptual Overview

The OSM Data Client allows you to extract OpenStreetMap data for specific geographic areas through the HOT Raw Data API. The workflow follows this pattern:

1. Define an area of interest (GeoJSON polygon)
2. Configure filters for specific OpenStreetMap features
3. Submit a request and wait for processing
4. Download and use the resulting data

## Quick Start

```python
import asyncio
from osm_data_client import get_osm_data

async def main():
# Define area of interest
geometry = {
"type": "Polygon",
"coordinates": [[
[-73.98, 40.75], # NYC area
[-73.98, 40.76],
[-73.97, 40.76],
[-73.97, 40.75],
[-73.98, 40.75]
]]
}

# Request building data
result = await get_osm_data(
geometry,
fileName="nyc_buildings",
outputType="geojson",
filters={
"tags": {
"all_geometry": {
"building": [] # All buildings
}
}
}
)

print(f"Data downloaded to: {result.path}")

if __name__ == "__main__":
asyncio.run(main())
```

## Command-Line Interface

Extract data using the CLI:

```bash
python -m osm_data_client.cli --bounds -73.98 40.75 -73.97 40.76 --feature-type building --out buildings.geojson
```

## Key Components

- **get_osm_data**: Main function for simple requests
- **RawDataClient**: Configurable client for advanced usage
- **GeometryInput**: Handles polygon validation
- **RequestParams**: Handles request configuration
- **RawDataResult**: Contains the result file path and metadata

## Common Use Cases

### Configuring Output Directory

```python
from osm_data_client import RawDataClient, RawDataClientConfig

config = RawDataClientConfig(output_directory="/path/to/outputs")
client = RawDataClient(config)

result = await client.get_osm_data(geometry, **params)
```

### Controlling File Extraction

```python
from osm_data_client import RawDataOutputOptions, AutoExtractOption

# Always extract from zip archives
options = RawDataOutputOptions(auto_extract=AutoExtractOption.force_extract)

result = await client.get_osm_data(geometry, options, **params)
```

### Using Different Output Formats

```python
# GeoJSON example
result = await get_osm_data(
geometry,
outputType="geojson",
filters={"tags": {"all_geometry": {"building": []}}}
)

# Shapefile example
result = await get_osm_data(
geometry,
outputType="shp",
filters={"tags": {"all_geometry": {"highway": []}}}
)
```

## Error Handling

The client uses specific exception types for different errors:

```python
from osm_data_client.exceptions import ValidationError, APIRequestError

try:
result = await get_osm_data(geometry, **params)
except ValidationError as e:
print(f"Invalid input: {e}")
except APIRequestError as e:
print(f"API error: {e}")
```

## API Reference

### Core Functions

```python
async def get_osm_data(
geometry: Union[Dict[str, Any], str],
**kwargs
) -> RawDataResult
```

### Client Classes

```python
class RawDataClient:
async def get_osm_data(
self,
geometry: Union[Dict[str, Any], str],
output_options: RawDataOutputOptions = RawDataOutputOptions.default(),
**kwargs
) -> RawDataResult
```

### Configuration Classes

```python
@dataclass
class RawDataClientConfig:
access_token: Optional[str] = None
memory_threshold_mb: int = 50
base_api_url: str = "https://api-prod.raw-data.hotosm.org/v1"
output_directory: Path = Path.cwd()
```

```python
class AutoExtractOption(Enum):
automatic = auto() # Decide based on format and size
force_zip = auto() # Always keep as zip
force_extract = auto() # Always extract
```

## CLI Options

```
python -m osm_data_client.cli [options]

Options:
--geojson PATH Path to GeoJSON file or GeoJSON string
--bounds XMIN YMIN XMAX YMAX
Bounds coordinates in EPSG:4326
--feature-type TYPE Type of feature to download (default: "building")
--out PATH Output path (default: "./osm_data.geojson")
--format FORMAT Output format (geojson, shp, kml, etc.)
--no-zip Do not request data as a zip file
--extract Extract files from zip archive
--verbose, -v Enable verbose logging
```
14 changes: 7 additions & 7 deletions osm_data_client/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ async def run_cli(args: argparse.Namespace) -> int:
"""
try:
# Prepare geometry
if args.bbox:
log.info("Using bounding box: %s", args.bbox)
geometry = bbox_to_polygon(*args.bbox)
if args.bounds:
log.info("Using bounding box: %s", args.bounds)
geometry = bbox_to_polygon(*args.bounds)
else:
geojson_path = Path(args.geojson)
if geojson_path.exists():
Expand Down Expand Up @@ -150,11 +150,11 @@ def main() -> int:
"--geojson", type=str, help="Path to GeoJSON file or GeoJSON string."
)
group.add_argument(
"--bbox",
"--bounds",
nargs=4,
type=float,
metavar=("xmin", "ymin", "xmax", "ymax"),
help="Bounding box coordinates (assumed to be in EPSG:4326).",
help="Bounds coordinates (assumed to be in EPSG:4326).",
)

parser.add_argument(
Expand Down Expand Up @@ -224,8 +224,8 @@ def main() -> int:
print(f"OSM Data Client version {ver}")
return 0

if not args.geojson and not args.bbox:
parser.error("one of the arguments --geojson --bbox is required")
if not args.geojson and not args.bounds:
parser.error("one of the arguments --geojson --bounds is required")

# Run the CLI asynchronously
return asyncio.run(run_cli(args))
Expand Down
14 changes: 7 additions & 7 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,13 @@ def test_missing_required_args(self):
result = self.run_cli_command([], check=False)

assert result.returncode != 0
assert "--geojson" in result.stderr and "--bbox" in result.stderr
assert "--geojson" in result.stderr and "--bounds" in result.stderr

@pytest.mark.skipif(
os.environ.get("SKIP_API_TESTS") == "1",
reason="Skipping tests that require API access"
)
def test_bbox_download(self):
def test_bounds_download(self):
"""Test downloading data for a bounding box."""
output_file = OUTPUT_DIR / "bbox_test.geojson"

Expand All @@ -113,7 +113,7 @@ def test_bbox_download(self):
output_file.unlink()

result = self.run_cli_command([
"--bbox", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]),
"--bounds", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]),
"--feature-type", "building",
"--out", str(output_file)
])
Expand Down Expand Up @@ -168,7 +168,7 @@ def test_extract_option(self):
output_file.unlink()

result = self.run_cli_command([
"--bbox", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]),
"--bounds", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]),
"--feature-type", "building",
"--out", str(output_file),
"--extract" # Force extraction
Expand Down Expand Up @@ -203,7 +203,7 @@ def test_no_zip_option(self):
output_file.unlink()

result = self.run_cli_command([
"--bbox", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]),
"--bounds", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]),
"--feature-type", "building",
"--out", str(output_file),
"--no-zip" # Request unzipped data
Expand Down Expand Up @@ -237,7 +237,7 @@ def test_different_formats_cli(self):
shutil.rmtree(csv_dir)

result = self.run_cli_command([
"--bbox", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]),
"--bounds", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]),
"--feature-type", "building",
"--out", str(output_file),
"--format", "csv" # Test CSV format
Expand Down Expand Up @@ -279,7 +279,7 @@ def test_custom_api_url(self):
output_file.unlink()

result = self.run_cli_command([
"--bbox", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]),
"--bounds", str(TINY_BBOX[0]), str(TINY_BBOX[1]), str(TINY_BBOX[2]), str(TINY_BBOX[3]),
"--feature-type", "building",
"--out", str(output_file),
"--api-url", "https://api-prod.raw-data.hotosm.org/v1" # Explicit API URL
Expand Down