From a0547a754d58395501fff314f88a48bc0deeb89e Mon Sep 17 00:00:00 2001 From: sujanadh Date: Fri, 20 Jun 2025 18:18:54 +0545 Subject: [PATCH 1/9] fix(config): add stream parameter to check if download is required or not --- osm_data_client/client.py | 21 ++++++++++++++++----- osm_data_client/models.py | 7 ++++--- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/osm_data_client/client.py b/osm_data_client/client.py index e933739..921eed0 100644 --- a/osm_data_client/client.py +++ b/osm_data_client/client.py @@ -1,7 +1,7 @@ import logging import asyncio import json -from typing import Dict, Any, Union +from typing import Dict, Any, Union,Optional from aiohttp import ClientSession, ClientResponseError from .models import ( @@ -307,12 +307,19 @@ async def get_osm_data( # Create metadata from the result metadata = RawDataApiMetadata.from_api_result(result, params) log.debug("Data metadata: %s", metadata) - + + # Avoid downloading + if self.config.stream: + return result.get("result") + # Download the data return await self.api.download_to_disk(metadata, output_options) - -async def get_osm_data(geometry: Union[Dict[str, Any], str], **kwargs) -> RawDataResult: +async def get_osm_data( + geometry: Union[Dict[str, Any], str], + stream: Optional[bool] = None, + **kwargs +) -> RawDataResult: """ Get OSM data for a specified area. @@ -324,6 +331,7 @@ async def get_osm_data(geometry: Union[Dict[str, Any], str], **kwargs) -> RawDat - fileName: Name for the export file (default: "osm_export") - outputType: Format of the output (default: "geojson") - bindZip: Whether to retrieve results as a zip file (default: False) + - stream: Boolean whether to stream data url or not (default: False) - filters: Dictionary of filters to apply - geometryType: List of geometry types to include @@ -336,5 +344,8 @@ async def get_osm_data(geometry: Union[Dict[str, Any], str], **kwargs) -> RawDat TaskPollingError: If polling the task status fails DownloadError: If downloading data fails """ - client = RawDataClient() + config = RawDataClientConfig.default() + if stream: + config.stream = stream + client = RawDataClient(config=config) return await client.get_osm_data(geometry, **kwargs) diff --git a/osm_data_client/models.py b/osm_data_client/models.py index c43406b..8ec4454 100644 --- a/osm_data_client/models.py +++ b/osm_data_client/models.py @@ -209,8 +209,8 @@ def to_api_params(self) -> Dict[str, Any]: @staticmethod def validate_bind_zip_compatibility(output_type, bind_zip): """Validate if the output format is compatible with bindZip=False""" - streaming_compatible_formats = ["geojson", "cog"] # Cloud Optimized GeoTIFF - + streaming_compatible_formats = ["geojson", "cog", "fgb"] # Cloud Optimized GeoTIFF, FlatGeoBuf + if not bind_zip and output_type.lower() not in streaming_compatible_formats: log.warning( f"Format '{output_type}' requires ZIP packaging. " @@ -317,7 +317,8 @@ class RawDataClientConfig: memory_threshold_mb: int = 50 base_api_url: str = "https://api-prod.raw-data.hotosm.org/v1" output_directory: Path = Path.cwd() - + stream: bool = False + @property def memory_threshold_bytes(self) -> int: """Convert memory threshold to bytes.""" From e77476e5c6387ead15aefa76a139b713b7631139 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 20 Jun 2025 12:36:57 +0000 Subject: [PATCH 2/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- osm_data_client/client.py | 11 +++++------ osm_data_client/models.py | 10 +++++++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/osm_data_client/client.py b/osm_data_client/client.py index 921eed0..fedfb01 100644 --- a/osm_data_client/client.py +++ b/osm_data_client/client.py @@ -1,7 +1,7 @@ import logging import asyncio import json -from typing import Dict, Any, Union,Optional +from typing import Dict, Any, Union, Optional from aiohttp import ClientSession, ClientResponseError from .models import ( @@ -307,18 +307,17 @@ async def get_osm_data( # Create metadata from the result metadata = RawDataApiMetadata.from_api_result(result, params) log.debug("Data metadata: %s", metadata) - + # Avoid downloading if self.config.stream: return result.get("result") - + # Download the data return await self.api.download_to_disk(metadata, output_options) + async def get_osm_data( - geometry: Union[Dict[str, Any], str], - stream: Optional[bool] = None, - **kwargs + geometry: Union[Dict[str, Any], str], stream: Optional[bool] = None, **kwargs ) -> RawDataResult: """ Get OSM data for a specified area. diff --git a/osm_data_client/models.py b/osm_data_client/models.py index 8ec4454..9c78800 100644 --- a/osm_data_client/models.py +++ b/osm_data_client/models.py @@ -209,8 +209,12 @@ def to_api_params(self) -> Dict[str, Any]: @staticmethod def validate_bind_zip_compatibility(output_type, bind_zip): """Validate if the output format is compatible with bindZip=False""" - streaming_compatible_formats = ["geojson", "cog", "fgb"] # Cloud Optimized GeoTIFF, FlatGeoBuf - + streaming_compatible_formats = [ + "geojson", + "cog", + "fgb", + ] # Cloud Optimized GeoTIFF, FlatGeoBuf + if not bind_zip and output_type.lower() not in streaming_compatible_formats: log.warning( f"Format '{output_type}' requires ZIP packaging. " @@ -318,7 +322,7 @@ class RawDataClientConfig: base_api_url: str = "https://api-prod.raw-data.hotosm.org/v1" output_directory: Path = Path.cwd() stream: bool = False - + @property def memory_threshold_bytes(self) -> int: """Convert memory threshold to bytes.""" From 5c4b612af960040d421f6d2e4896ae9134411077 Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Fri, 20 Jun 2025 16:36:10 +0100 Subject: [PATCH 3/9] build: add pkg build config, plus pytest config --- pyproject.toml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 881bd41..ed50e52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,3 +20,18 @@ dev = [ "mercantile>=1.2.1", "shapely>=2.1.0", ] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["osm_data_client"] + +[tool.pytest.ini_options] +addopts = "-ra -q" +testpaths = [ + "tests", +] +asyncio_mode="auto" +asyncio_default_fixture_loop_scope="session" From 02ee45c90b2554f1f96e60061a81a382319db380 Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Fri, 20 Jun 2025 16:36:52 +0100 Subject: [PATCH 4/9] fix: get stream param from kwargs --- osm_data_client/client.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/osm_data_client/client.py b/osm_data_client/client.py index fedfb01..098f865 100644 --- a/osm_data_client/client.py +++ b/osm_data_client/client.py @@ -316,9 +316,7 @@ async def get_osm_data( return await self.api.download_to_disk(metadata, output_options) -async def get_osm_data( - geometry: Union[Dict[str, Any], str], stream: Optional[bool] = None, **kwargs -) -> RawDataResult: +async def get_osm_data(geometry: dict[str, Any] | str, **kwargs) -> RawDataResult: """ Get OSM data for a specified area. @@ -330,9 +328,9 @@ async def get_osm_data( - fileName: Name for the export file (default: "osm_export") - outputType: Format of the output (default: "geojson") - bindZip: Whether to retrieve results as a zip file (default: False) - - stream: Boolean whether to stream data url or not (default: False) - filters: Dictionary of filters to apply - geometryType: List of geometry types to include + - stream: Boolean whether to stream data url or not (default: False) Returns: Path to the downloaded data file or directory @@ -344,7 +342,9 @@ async def get_osm_data( DownloadError: If downloading data fails """ config = RawDataClientConfig.default() - if stream: + + if (stream := kwargs.pop("stream", False)): config.stream = stream + client = RawDataClient(config=config) return await client.get_osm_data(geometry, **kwargs) From a021be2b1306cc2a4ff93b73aac9fa616cdf7438 Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Fri, 20 Jun 2025 16:37:07 +0100 Subject: [PATCH 5/9] refactor: replace typing imports with direct python types --- README.md | 4 ++-- osm_data_client/client.py | 8 ++++---- osm_data_client/models.py | 20 ++++++++++---------- osm_data_client/processing.py | 4 ++-- osm_data_client/utils/file.py | 4 ++-- osm_data_client/utils/geometry.py | 4 ++-- tests/README.md | 9 +++++---- uv.lock | 5 ++--- 8 files changed, 29 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 5ce62af..1225d54 100644 --- a/README.md +++ b/README.md @@ -178,7 +178,7 @@ except APIRequestError as e: ```python async def get_osm_data( - geometry: Union[Dict[str, Any], str], + geometry: dict[str, Any] | str, **kwargs ) -> RawDataResult ``` @@ -189,7 +189,7 @@ async def get_osm_data( class RawDataClient: async def get_osm_data( self, - geometry: Union[Dict[str, Any], str], + geometry: dict[str, Any] | str, output_options: RawDataOutputOptions = RawDataOutputOptions.default(), **kwargs ) -> RawDataResult diff --git a/osm_data_client/client.py b/osm_data_client/client.py index 098f865..cbccaae 100644 --- a/osm_data_client/client.py +++ b/osm_data_client/client.py @@ -1,7 +1,7 @@ import logging import asyncio import json -from typing import Dict, Any, Union, Optional +from typing import Any from aiohttp import ClientSession, ClientResponseError from .models import ( @@ -41,7 +41,7 @@ def __init__(self, config: RawDataClientConfig = RawDataClientConfig.default()): async def request_snapshot( self, geometry: GeometryInput, params: RequestParams - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """ Request a snapshot of OSM data. @@ -95,7 +95,7 @@ async def request_snapshot( async def poll_task_status( self, task_link: str, polling_interval: int = 2 - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """ Poll the API to check task status until completion. @@ -247,7 +247,7 @@ def __init__(self, config: RawDataClientConfig = RawDataClientConfig.default()): async def get_osm_data( self, - geometry: Union[Dict[str, Any], str], + geometry: dict[str, Any] | str, output_options: RawDataOutputOptions = RawDataOutputOptions.default(), **kwargs, ) -> RawDataResult: diff --git a/osm_data_client/models.py b/osm_data_client/models.py index 9c78800..71c4c84 100644 --- a/osm_data_client/models.py +++ b/osm_data_client/models.py @@ -1,6 +1,6 @@ import logging import json -from typing import Dict, Any, Union, Optional, List, TypedDict, Tuple +from typing import Any, Optional, TypedDict from dataclasses import dataclass from enum import Enum, auto from pathlib import Path @@ -11,8 +11,8 @@ class FilterDict(TypedDict, total=False): """TypedDict for filter specifications.""" - tags: Dict[str, Any] - attributes: Dict[str, List[str]] + tags: dict[str, Any] + attributes: dict[str, list[str]] @dataclass @@ -20,10 +20,10 @@ class GeometryInput: """Validated geometry input for OSM API requests.""" type: str - coordinates: List[Any] + coordinates: list[Any] @classmethod - def from_input(cls, geometry: Union[Dict[str, Any], str]) -> "GeometryInput": + def from_input(cls, geometry: dict[str, Any] | str) -> "GeometryInput": """ Create a GeometryInput from either a dictionary or a JSON string. @@ -121,7 +121,7 @@ def _is_valid_coordinate(coord): return False return -180 <= coord[0] <= 180 and -90 <= coord[1] <= 90 - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """Convert to dictionary representation.""" return {"type": self.type, "coordinates": self.coordinates} @@ -134,7 +134,7 @@ class RequestParams: output_type: str = "geojson" bind_zip: bool = True filters: Optional[FilterDict] = None - geometry_type: Optional[List[str]] = None + geometry_type: Optional[list[str]] = None VALID_OUTPUT_TYPES = [ "geojson", @@ -189,7 +189,7 @@ def from_kwargs(cls, **kwargs) -> "RequestParams": return instance - def to_api_params(self) -> Dict[str, Any]: + def to_api_params(self) -> dict[str, Any]: """Convert to API parameter dictionary.""" # Convert to camelCase for API params = { @@ -235,11 +235,11 @@ class RawDataApiMetadata: file_name: str download_url: str is_zipped: bool - bbox: Optional[Tuple[float, float, float, float]] = None + bbox: Optional[tuple[float, float, float, float]] = None @classmethod def from_api_result( - cls, result: Dict[str, Any], params: RequestParams + cls, result: dict[str, Any], params: RequestParams ) -> "RawDataApiMetadata": """ Create a RawDataApiMetadata from API result and request parameters. diff --git a/osm_data_client/processing.py b/osm_data_client/processing.py index e356db9..2ff1354 100644 --- a/osm_data_client/processing.py +++ b/osm_data_client/processing.py @@ -3,7 +3,7 @@ import logging import zipfile import json -from typing import Optional, List +from typing import Optional from .models import RawDataApiMetadata, AutoExtractOption, RawDataOutputOptions from .exceptions import DownloadError @@ -28,7 +28,7 @@ class RawDataResult: metadata: RawDataApiMetadata extracted: bool = False original_path: Optional[Path] = None - extracted_files: Optional[List[Path]] = None + extracted_files: Optional[list[Path]] = None def exists(self) -> bool: """Check if the result file or directory exists.""" diff --git a/osm_data_client/utils/file.py b/osm_data_client/utils/file.py index 5b078bd..beb63a3 100644 --- a/osm_data_client/utils/file.py +++ b/osm_data_client/utils/file.py @@ -6,13 +6,13 @@ import json import logging -from typing import Dict, Any +from typing import Any from pathlib import Path log = logging.getLogger(__name__) -def save_to_geojson(data: Dict[str, Any], file_path: str) -> Path: +def save_to_geojson(data: dict[str, Any], file_path: str) -> Path: """ Save GeoJSON data to a file. diff --git a/osm_data_client/utils/geometry.py b/osm_data_client/utils/geometry.py index cab9c31..5c03cfc 100644 --- a/osm_data_client/utils/geometry.py +++ b/osm_data_client/utils/geometry.py @@ -5,14 +5,14 @@ """ import logging -from typing import Dict, Any +from typing import Any log = logging.getLogger(__name__) def bbox_to_polygon( min_x: float, min_y: float, max_x: float, max_y: float -) -> Dict[str, Any]: +) -> dict[str, Any]: """ Convert a bounding box to a GeoJSON polygon. diff --git a/tests/README.md b/tests/README.md index 1f46866..8450e40 100644 --- a/tests/README.md +++ b/tests/README.md @@ -11,7 +11,8 @@ API and file system. Run all integration tests: ```bash -python tests/run_tests.py +uv sync --group test +uv run python tests/run_tests.py ``` ### Skipping API Tests @@ -19,7 +20,7 @@ python tests/run_tests.py If you want to skip tests that make actual API calls: ```bash -SKIP_API_TESTS=1 python tests/run_tests.py +SKIP_API_TESTS=1 uv run python tests/run_tests.py ``` ### Running Specific Tests @@ -28,10 +29,10 @@ To run specific tests, use the `TEST_PATTERN` environment variable: ```bash # Run only CLI help test -TEST_PATTERN="test_cli.py::TestCliIntegration::test_cli_help" python tests/run_tests.py +TEST_PATTERN="test_cli.py::TestCliIntegration::test_cli_help" uv run python tests/run_tests.py # Run all CLI tests -TEST_PATTERN="test_cli.py" python tests/run_tests.py +TEST_PATTERN="test_cli.py" uv run python tests/run_tests.py ``` ## Test Data diff --git a/uv.lock b/uv.lock index cff43cd..68cde5f 100644 --- a/uv.lock +++ b/uv.lock @@ -1,4 +1,5 @@ version = 1 +revision = 1 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.12'", @@ -611,8 +612,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b8/2a/25e0be2b509c28375c7f75c7e8d8d060773f2cce4856a1654276e3202339/pycryptodome-3.22.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d21c1eda2f42211f18a25db4eaf8056c94a8563cd39da3683f89fe0d881fb772", size = 2262255 }, { url = "https://files.pythonhosted.org/packages/41/58/60917bc4bbd91712e53ce04daf237a74a0ad731383a01288130672994328/pycryptodome-3.22.0-cp37-abi3-win32.whl", hash = "sha256:f02baa9f5e35934c6e8dcec91fcde96612bdefef6e442813b8ea34e82c84bbfb", size = 1763403 }, { url = "https://files.pythonhosted.org/packages/55/f4/244c621afcf7867e23f63cfd7a9630f14cfe946c9be7e566af6c3915bcde/pycryptodome-3.22.0-cp37-abi3-win_amd64.whl", hash = "sha256:d086aed307e96d40c23c42418cbbca22ecc0ab4a8a0e24f87932eeab26c08627", size = 1794568 }, - { url = "https://files.pythonhosted.org/packages/cd/13/16d3a83b07f949a686f6cfd7cfc60e57a769ff502151ea140ad67b118e26/pycryptodome-3.22.0-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:98fd9da809d5675f3a65dcd9ed384b9dc67edab6a4cda150c5870a8122ec961d", size = 1700779 }, - { url = "https://files.pythonhosted.org/packages/13/af/16d26f7dfc5fd7696ea2c91448f937b51b55312b5bed44f777563e32a4fe/pycryptodome-3.22.0-pp27-pypy_73-win32.whl", hash = "sha256:37ddcd18284e6b36b0a71ea495a4c4dca35bb09ccc9bfd5b91bfaf2321f131c1", size = 1775230 }, { url = "https://files.pythonhosted.org/packages/37/c3/e3423e72669ca09f141aae493e1feaa8b8475859898b04f57078280a61c4/pycryptodome-3.22.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b4bdce34af16c1dcc7f8c66185684be15f5818afd2a82b75a4ce6b55f9783e13", size = 1618698 }, { url = "https://files.pythonhosted.org/packages/f9/b7/35eec0b3919cafea362dcb68bb0654d9cb3cde6da6b7a9d8480ce0bf203a/pycryptodome-3.22.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2988ffcd5137dc2d27eb51cd18c0f0f68e5b009d5fec56fbccb638f90934f333", size = 1666957 }, { url = "https://files.pythonhosted.org/packages/b0/1f/f49bccdd8d61f1da4278eb0d6aee7f988f1a6ec4056b0c2dc51eda45ae27/pycryptodome-3.22.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e653519dedcd1532788547f00eeb6108cc7ce9efdf5cc9996abce0d53f95d5a9", size = 1659242 }, @@ -753,7 +752,7 @@ wheels = [ [[package]] name = "raw-data-api-py" version = "0.1.0" -source = { virtual = "." } +source = { editable = "." } dependencies = [ { name = "aiofiles" }, { name = "aiohttp" }, From 0ff7dc68fabf63895289723eb38bbb43801ce89b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 20 Jun 2025 15:37:17 +0000 Subject: [PATCH 6/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- osm_data_client/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osm_data_client/client.py b/osm_data_client/client.py index cbccaae..8b5c879 100644 --- a/osm_data_client/client.py +++ b/osm_data_client/client.py @@ -343,7 +343,7 @@ async def get_osm_data(geometry: dict[str, Any] | str, **kwargs) -> RawDataResul """ config = RawDataClientConfig.default() - if (stream := kwargs.pop("stream", False)): + if stream := kwargs.pop("stream", False): config.stream = stream client = RawDataClient(config=config) From 8672a397b3b6dce03829e9c87599bb40796ba6f1 Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Fri, 20 Jun 2025 17:52:42 +0100 Subject: [PATCH 7/9] refactor: add download_data to RawDataOutputOptions to allow stream --- osm_data_client/client.py | 31 ++++++++++++----------- osm_data_client/models.py | 2 +- osm_data_client/processing.py | 15 ++++++++++- tests/test_api.py | 47 +++++++++++++++++++++++++---------- 4 files changed, 66 insertions(+), 29 deletions(-) diff --git a/osm_data_client/client.py b/osm_data_client/client.py index cbccaae..3d9c499 100644 --- a/osm_data_client/client.py +++ b/osm_data_client/client.py @@ -265,7 +265,7 @@ async def get_osm_data( - geometryType: List of geometry types to include Returns: - Path to the downloaded data file or directory + Object containing metadata, plus a filepath or data. Raises: ValidationError: If inputs are invalid @@ -308,15 +308,22 @@ async def get_osm_data( metadata = RawDataApiMetadata.from_api_result(result, params) log.debug("Data metadata: %s", metadata) - # Avoid downloading - if self.config.stream: - return result.get("result") + if output_options.download_file: + # Download the data + return await self.api.download_to_disk(metadata, output_options) - # Download the data - return await self.api.download_to_disk(metadata, output_options) + # Skip download and return directly + return RawDataResult( + metadata=metadata, + data=result.get("result", {}) + ) -async def get_osm_data(geometry: dict[str, Any] | str, **kwargs) -> RawDataResult: +async def get_osm_data( + geometry: dict[str, Any] | str, + output_options: RawDataOutputOptions = RawDataOutputOptions.default(), + **kwargs, +) -> RawDataResult: """ Get OSM data for a specified area. @@ -324,16 +331,16 @@ async def get_osm_data(geometry: dict[str, Any] | str, **kwargs) -> RawDataResul Args: geometry: GeoJSON geometry object or string + output_options: Options for controlling output behavior **kwargs: Additional parameters for customizing the request - fileName: Name for the export file (default: "osm_export") - outputType: Format of the output (default: "geojson") - bindZip: Whether to retrieve results as a zip file (default: False) - filters: Dictionary of filters to apply - geometryType: List of geometry types to include - - stream: Boolean whether to stream data url or not (default: False) Returns: - Path to the downloaded data file or directory + Object containing metadata, plus a filepath or data. Raises: ValidationError: If inputs are invalid @@ -342,9 +349,5 @@ async def get_osm_data(geometry: dict[str, Any] | str, **kwargs) -> RawDataResul DownloadError: If downloading data fails """ config = RawDataClientConfig.default() - - if (stream := kwargs.pop("stream", False)): - config.stream = stream - client = RawDataClient(config=config) - return await client.get_osm_data(geometry, **kwargs) + return await client.get_osm_data(geometry, output_options, **kwargs) diff --git a/osm_data_client/models.py b/osm_data_client/models.py index 71c4c84..de79e8b 100644 --- a/osm_data_client/models.py +++ b/osm_data_client/models.py @@ -321,7 +321,6 @@ class RawDataClientConfig: memory_threshold_mb: int = 50 base_api_url: str = "https://api-prod.raw-data.hotosm.org/v1" output_directory: Path = Path.cwd() - stream: bool = False @property def memory_threshold_bytes(self) -> int: @@ -338,6 +337,7 @@ def default(cls) -> "RawDataClientConfig": class RawDataOutputOptions: """Options for controlling how output data is handled.""" + download_file: bool = True auto_extract: AutoExtractOption = AutoExtractOption.automatic @classmethod diff --git a/osm_data_client/processing.py b/osm_data_client/processing.py index 2ff1354..f1101ac 100644 --- a/osm_data_client/processing.py +++ b/osm_data_client/processing.py @@ -18,24 +18,36 @@ class RawDataResult: Attributes: path: Path to the final processed file or directory + data: dict representation of data metadata: Original metadata from the API response extracted: Whether the file was extracted from an archive original_path: Path to the original downloaded file (if different from path) extracted_files: List of files that were extracted (if applicable) """ - path: Path metadata: RawDataApiMetadata + path: Optional[Path] = None + data: Optional[dict] = None extracted: bool = False original_path: Optional[Path] = None extracted_files: Optional[list[Path]] = None def exists(self) -> bool: """Check if the result file or directory exists.""" + if not self.path: + return False return self.path.exists() + def suffix(self) -> str: + """Get file type suffix, if path exists.""" + if not self.path: + return "" + return self.path.suffix + def __str__(self) -> str: """Return string representation of the result.""" + if not self.path: + return "" return str(self.path) @@ -308,6 +320,7 @@ async def _extract_with_stream_unzip( extract_dir.mkdir(parents=True, exist_ok=True) output_base = extract_dir else: + extract_dir = "." output_base = output_directory # Prepare a file list to track what we extract diff --git a/tests/test_api.py b/tests/test_api.py index e8d6c6d..232d543 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -68,7 +68,7 @@ def cleanup_files(self): # Clean up after test for file_path in created_files: - if file_path.exists(): + if file_path and file_path.exists(): if file_path.is_dir(): shutil.rmtree(file_path) else: @@ -93,7 +93,7 @@ async def test_basic_building_download(self, small_geometry, cleanup_files): assert result.exists(), f"Result file {result.path} does not exist" # Check if we got an actual file with content - if result.path.is_file(): + if result.path and result.path.is_file(): file_size = result.path.stat().st_size assert file_size > 0, f"Result file {result.path} is empty (0 bytes)" print(f"Downloaded file size: {file_size} bytes") @@ -104,7 +104,7 @@ async def test_basic_building_download(self, small_geometry, cleanup_files): @pytest.mark.asyncio async def test_auto_extract_options(self, small_geometry, cleanup_files): """Test the different auto-extract options.""" - # Test keeping as zip (force_zip) + # 1. Test keeping as zip (force_zip) output_options = RawDataOutputOptions(auto_extract=AutoExtractOption.force_zip) params = { @@ -123,14 +123,14 @@ async def test_auto_extract_options(self, small_geometry, cleanup_files): ) cleanup_files.append(first_result.path) - assert first_result.path.exists(), ( + assert first_result.exists(), ( f"Result file {first_result.path} does not exist" ) - assert first_result.path.suffix == ".zip", ( - f"Expected ZIP file, got {first_result.path.suffix}" + assert first_result.suffix() == ".zip", ( + f"Expected ZIP file, got path {first_result.path}" ) - # Test forcing extraction (force_extract) + # 2. Test forcing extraction (force_extract) output_options = RawDataOutputOptions( auto_extract=AutoExtractOption.force_extract ) @@ -143,16 +143,37 @@ async def test_auto_extract_options(self, small_geometry, cleanup_files): ) cleanup_files.append(second_result.path) - assert second_result.path.exists(), ( + assert second_result.exists(), ( f"Result file {second_result.path} does not exist" ) # Check if we got the expected file type (not a zip) if second_result.extracted: - assert second_result.path.suffix != ".zip", ( + assert second_result.suffix() != ".zip", ( "Expected extracted file, got ZIP file" ) + # 3. Test streaming result + output_options = RawDataOutputOptions( + download_file=False + ) + + # Create a new file name to avoid conflicts + params["fileName"] = "test_stream_data" + + third_result = await client.get_osm_data( + small_geometry, output_options, **params + ) + cleanup_files.append(third_result.path) + + assert not third_result.exists(), ( + f"Result file {second_result.path} was downloaded, but shouldn't have been" + ) + + # Check the data was assigned to property + assert isinstance(third_result.data, dict) + assert len(third_result.data) > 0 + @pytest.mark.asyncio async def test_different_formats(self, small_geometry, cleanup_files): """Test fetching data in different output formats.""" @@ -172,12 +193,12 @@ async def test_different_formats(self, small_geometry, cleanup_files): cleanup_files.append(result.path) # Verify the result exists with helpful messages - assert result.path.exists(), ( + assert result.exists(), ( f"Result file for {format_type} format does not exist" ) # Check file size to ensure we got actual content - if result.path.is_file(): + if result.path and result.path.is_file(): file_size = result.path.stat().st_size assert file_size > 0, ( f"Result file for {format_type} format is empty (0 bytes)" @@ -204,10 +225,10 @@ async def test_with_api_config(self, small_geometry, cleanup_files): cleanup_files.append(result.path) # Verify the result exists with helpful messages - assert result.path.exists(), f"Result file {result.path} does not exist" + assert result.exists(), f"Result file {result.path} does not exist" # Check file size - if result.path.is_file(): + if result.path and result.path.is_file(): file_size = result.path.stat().st_size assert file_size > 0, "Result file is empty (0 bytes)" print(f"Downloaded file size: {file_size} bytes") From cea04cfd1ed2208e14e841cd5525c1b0dd7a8037 Mon Sep 17 00:00:00 2001 From: spwoodcock Date: Fri, 20 Jun 2025 17:56:17 +0100 Subject: [PATCH 8/9] docs: add info about using download_file=False --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.md b/README.md index 1225d54..cc559ee 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,24 @@ client = RawDataClient(config) result = await client.get_osm_data(geometry, **params) ``` +### Streaming Data Directly (No Download) + +```python +from osm_data_client import RawDataOutputOptions + +# Do not download the file, just return the response +options = RawDataOutputOptions(download_file=False) + +result = await client.get_osm_data(geometry, options, { + "outputType": "geojson", + "bindZip": False, +}) +``` + +> [!NOTE] +> This configuration is best used with the bindZip=False +> param and geojson output, as shown above. + ### Controlling File Extraction ```python From 89038b05a0c5b74d2686ecf07065af54c1e70c8f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 20 Jun 2025 16:58:13 +0000 Subject: [PATCH 9/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- osm_data_client/client.py | 11 ++++------- tests/test_api.py | 8 ++------ 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/osm_data_client/client.py b/osm_data_client/client.py index 3d9c499..e605ee0 100644 --- a/osm_data_client/client.py +++ b/osm_data_client/client.py @@ -313,16 +313,13 @@ async def get_osm_data( return await self.api.download_to_disk(metadata, output_options) # Skip download and return directly - return RawDataResult( - metadata=metadata, - data=result.get("result", {}) - ) + return RawDataResult(metadata=metadata, data=result.get("result", {})) async def get_osm_data( - geometry: dict[str, Any] | str, - output_options: RawDataOutputOptions = RawDataOutputOptions.default(), - **kwargs, + geometry: dict[str, Any] | str, + output_options: RawDataOutputOptions = RawDataOutputOptions.default(), + **kwargs, ) -> RawDataResult: """ Get OSM data for a specified area. diff --git a/tests/test_api.py b/tests/test_api.py index 232d543..2eb2275 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -123,9 +123,7 @@ async def test_auto_extract_options(self, small_geometry, cleanup_files): ) cleanup_files.append(first_result.path) - assert first_result.exists(), ( - f"Result file {first_result.path} does not exist" - ) + assert first_result.exists(), f"Result file {first_result.path} does not exist" assert first_result.suffix() == ".zip", ( f"Expected ZIP file, got path {first_result.path}" ) @@ -154,9 +152,7 @@ async def test_auto_extract_options(self, small_geometry, cleanup_files): ) # 3. Test streaming result - output_options = RawDataOutputOptions( - download_file=False - ) + output_options = RawDataOutputOptions(download_file=False) # Create a new file name to avoid conflicts params["fileName"] = "test_stream_data"