Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,24 @@ client = RawDataClient(config)
result = await client.get_osm_data(geometry, **params)
```

### Streaming Data Directly (No Download)

```python
from osm_data_client import RawDataOutputOptions

# Do not download the file, just return the response
options = RawDataOutputOptions(download_file=False)

result = await client.get_osm_data(geometry, options, {
"outputType": "geojson",
"bindZip": False,
})
```

> [!NOTE]
> This configuration is best used with the bindZip=False
> param and geojson output, as shown above.

### Controlling File Extraction

```python
Expand Down Expand Up @@ -178,7 +196,7 @@ except APIRequestError as e:

```python
async def get_osm_data(
geometry: Union[Dict[str, Any], str],
geometry: dict[str, Any] | str,
**kwargs
) -> RawDataResult
```
Expand All @@ -189,7 +207,7 @@ async def get_osm_data(
class RawDataClient:
async def get_osm_data(
self,
geometry: Union[Dict[str, Any], str],
geometry: dict[str, Any] | str,
output_options: RawDataOutputOptions = RawDataOutputOptions.default(),
**kwargs
) -> RawDataResult
Expand Down
32 changes: 21 additions & 11 deletions osm_data_client/client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import asyncio
import json
from typing import Dict, Any, Union
from typing import Any
from aiohttp import ClientSession, ClientResponseError

from .models import (
Expand Down Expand Up @@ -41,7 +41,7 @@ def __init__(self, config: RawDataClientConfig = RawDataClientConfig.default()):

async def request_snapshot(
self, geometry: GeometryInput, params: RequestParams
) -> Dict[str, Any]:
) -> dict[str, Any]:
"""
Request a snapshot of OSM data.

Expand Down Expand Up @@ -95,7 +95,7 @@ async def request_snapshot(

async def poll_task_status(
self, task_link: str, polling_interval: int = 2
) -> Dict[str, Any]:
) -> dict[str, Any]:
"""
Poll the API to check task status until completion.

Expand Down Expand Up @@ -247,7 +247,7 @@ def __init__(self, config: RawDataClientConfig = RawDataClientConfig.default()):

async def get_osm_data(
self,
geometry: Union[Dict[str, Any], str],
geometry: dict[str, Any] | str,
output_options: RawDataOutputOptions = RawDataOutputOptions.default(),
**kwargs,
) -> RawDataResult:
Expand All @@ -265,7 +265,7 @@ async def get_osm_data(
- geometryType: List of geometry types to include

Returns:
Path to the downloaded data file or directory
Object containing metadata, plus a filepath or data.

Raises:
ValidationError: If inputs are invalid
Expand Down Expand Up @@ -308,18 +308,27 @@ async def get_osm_data(
metadata = RawDataApiMetadata.from_api_result(result, params)
log.debug("Data metadata: %s", metadata)

# Download the data
return await self.api.download_to_disk(metadata, output_options)
if output_options.download_file:
# Download the data
return await self.api.download_to_disk(metadata, output_options)

# Skip download and return directly
return RawDataResult(metadata=metadata, data=result.get("result", {}))

async def get_osm_data(geometry: Union[Dict[str, Any], str], **kwargs) -> RawDataResult:

async def get_osm_data(
geometry: dict[str, Any] | str,
output_options: RawDataOutputOptions = RawDataOutputOptions.default(),
**kwargs,
) -> RawDataResult:
"""
Get OSM data for a specified area.

This is a convenience wrapper around RawDataClient.get_osm_data.

Args:
geometry: GeoJSON geometry object or string
output_options: Options for controlling output behavior
**kwargs: Additional parameters for customizing the request
- fileName: Name for the export file (default: "osm_export")
- outputType: Format of the output (default: "geojson")
Expand All @@ -328,13 +337,14 @@ async def get_osm_data(geometry: Union[Dict[str, Any], str], **kwargs) -> RawDat
- geometryType: List of geometry types to include

Returns:
Path to the downloaded data file or directory
Object containing metadata, plus a filepath or data.

Raises:
ValidationError: If inputs are invalid
APIRequestError: If the API request fails
TaskPollingError: If polling the task status fails
DownloadError: If downloading data fails
"""
client = RawDataClient()
return await client.get_osm_data(geometry, **kwargs)
config = RawDataClientConfig.default()
client = RawDataClient(config=config)
return await client.get_osm_data(geometry, output_options, **kwargs)
27 changes: 16 additions & 11 deletions osm_data_client/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import json
from typing import Dict, Any, Union, Optional, List, TypedDict, Tuple
from typing import Any, Optional, TypedDict
from dataclasses import dataclass
from enum import Enum, auto
from pathlib import Path
Expand All @@ -11,19 +11,19 @@
class FilterDict(TypedDict, total=False):
"""TypedDict for filter specifications."""

tags: Dict[str, Any]
attributes: Dict[str, List[str]]
tags: dict[str, Any]
attributes: dict[str, list[str]]


@dataclass
class GeometryInput:
"""Validated geometry input for OSM API requests."""

type: str
coordinates: List[Any]
coordinates: list[Any]

@classmethod
def from_input(cls, geometry: Union[Dict[str, Any], str]) -> "GeometryInput":
def from_input(cls, geometry: dict[str, Any] | str) -> "GeometryInput":
"""
Create a GeometryInput from either a dictionary or a JSON string.

Expand Down Expand Up @@ -121,7 +121,7 @@ def _is_valid_coordinate(coord):
return False
return -180 <= coord[0] <= 180 and -90 <= coord[1] <= 90

def to_dict(self) -> Dict[str, Any]:
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary representation."""
return {"type": self.type, "coordinates": self.coordinates}

Expand All @@ -134,7 +134,7 @@ class RequestParams:
output_type: str = "geojson"
bind_zip: bool = True
filters: Optional[FilterDict] = None
geometry_type: Optional[List[str]] = None
geometry_type: Optional[list[str]] = None

VALID_OUTPUT_TYPES = [
"geojson",
Expand Down Expand Up @@ -189,7 +189,7 @@ def from_kwargs(cls, **kwargs) -> "RequestParams":

return instance

def to_api_params(self) -> Dict[str, Any]:
def to_api_params(self) -> dict[str, Any]:
"""Convert to API parameter dictionary."""
# Convert to camelCase for API
params = {
Expand All @@ -209,7 +209,11 @@ def to_api_params(self) -> Dict[str, Any]:
@staticmethod
def validate_bind_zip_compatibility(output_type, bind_zip):
"""Validate if the output format is compatible with bindZip=False"""
streaming_compatible_formats = ["geojson", "cog"] # Cloud Optimized GeoTIFF
streaming_compatible_formats = [
"geojson",
"cog",
"fgb",
] # Cloud Optimized GeoTIFF, FlatGeoBuf

if not bind_zip and output_type.lower() not in streaming_compatible_formats:
log.warning(
Expand All @@ -231,11 +235,11 @@ class RawDataApiMetadata:
file_name: str
download_url: str
is_zipped: bool
bbox: Optional[Tuple[float, float, float, float]] = None
bbox: Optional[tuple[float, float, float, float]] = None

@classmethod
def from_api_result(
cls, result: Dict[str, Any], params: RequestParams
cls, result: dict[str, Any], params: RequestParams
) -> "RawDataApiMetadata":
"""
Create a RawDataApiMetadata from API result and request parameters.
Expand Down Expand Up @@ -333,6 +337,7 @@ def default(cls) -> "RawDataClientConfig":
class RawDataOutputOptions:
"""Options for controlling how output data is handled."""

download_file: bool = True
auto_extract: AutoExtractOption = AutoExtractOption.automatic

@classmethod
Expand Down
19 changes: 16 additions & 3 deletions osm_data_client/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import zipfile
import json
from typing import Optional, List
from typing import Optional

from .models import RawDataApiMetadata, AutoExtractOption, RawDataOutputOptions
from .exceptions import DownloadError
Expand All @@ -18,24 +18,36 @@ class RawDataResult:

Attributes:
path: Path to the final processed file or directory
data: dict representation of data
metadata: Original metadata from the API response
extracted: Whether the file was extracted from an archive
original_path: Path to the original downloaded file (if different from path)
extracted_files: List of files that were extracted (if applicable)
"""

path: Path
metadata: RawDataApiMetadata
path: Optional[Path] = None
data: Optional[dict] = None
extracted: bool = False
original_path: Optional[Path] = None
extracted_files: Optional[List[Path]] = None
extracted_files: Optional[list[Path]] = None

def exists(self) -> bool:
"""Check if the result file or directory exists."""
if not self.path:
return False
return self.path.exists()

def suffix(self) -> str:
"""Get file type suffix, if path exists."""
if not self.path:
return ""
return self.path.suffix

def __str__(self) -> str:
"""Return string representation of the result."""
if not self.path:
return ""
return str(self.path)


Expand Down Expand Up @@ -308,6 +320,7 @@ async def _extract_with_stream_unzip(
extract_dir.mkdir(parents=True, exist_ok=True)
output_base = extract_dir
else:
extract_dir = "."
output_base = output_directory

# Prepare a file list to track what we extract
Expand Down
4 changes: 2 additions & 2 deletions osm_data_client/utils/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@

import json
import logging
from typing import Dict, Any
from typing import Any
from pathlib import Path

log = logging.getLogger(__name__)


def save_to_geojson(data: Dict[str, Any], file_path: str) -> Path:
def save_to_geojson(data: dict[str, Any], file_path: str) -> Path:
"""
Save GeoJSON data to a file.

Expand Down
4 changes: 2 additions & 2 deletions osm_data_client/utils/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
"""

import logging
from typing import Dict, Any
from typing import Any

log = logging.getLogger(__name__)


def bbox_to_polygon(
min_x: float, min_y: float, max_x: float, max_y: float
) -> Dict[str, Any]:
) -> dict[str, Any]:
"""
Convert a bounding box to a GeoJSON polygon.

Expand Down
15 changes: 15 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,18 @@ dev = [
"mercantile>=1.2.1",
"shapely>=2.1.0",
]

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["osm_data_client"]

[tool.pytest.ini_options]
addopts = "-ra -q"
testpaths = [
"tests",
]
asyncio_mode="auto"
asyncio_default_fixture_loop_scope="session"
9 changes: 5 additions & 4 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,16 @@ API and file system.
Run all integration tests:

```bash
python tests/run_tests.py
uv sync --group test
uv run python tests/run_tests.py
```

### Skipping API Tests

If you want to skip tests that make actual API calls:

```bash
SKIP_API_TESTS=1 python tests/run_tests.py
SKIP_API_TESTS=1 uv run python tests/run_tests.py
```

### Running Specific Tests
Expand All @@ -28,10 +29,10 @@ To run specific tests, use the `TEST_PATTERN` environment variable:

```bash
# Run only CLI help test
TEST_PATTERN="test_cli.py::TestCliIntegration::test_cli_help" python tests/run_tests.py
TEST_PATTERN="test_cli.py::TestCliIntegration::test_cli_help" uv run python tests/run_tests.py

# Run all CLI tests
TEST_PATTERN="test_cli.py" python tests/run_tests.py
TEST_PATTERN="test_cli.py" uv run python tests/run_tests.py
```

## Test Data
Expand Down
Loading