diff --git a/CHANGES.md b/CHANGES.md index 143932d..5df9d41 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,7 @@ +## 0.16.0 (2025-04-30) + +* new Range request parsing logic to make sure it works with S3 and HTTPS files + ## 0.15.0 (2025-02-27) * add support for `VSIFile` backend (https://github.com/developmentseed/tilebench/pull/27) diff --git a/tests/test_tilebench.py b/tests/test_tilebench.py new file mode 100644 index 0000000..5c022d7 --- /dev/null +++ b/tests/test_tilebench.py @@ -0,0 +1,50 @@ +"""Test profiler with S3 and HTTPS files. + +NOTE: while not in GDAL>=3.10 the number of GET/Head requests might not be right +see: https://github.com/vincentsarago/vsifile/issues/13#issuecomment-2683310594 + +""" + +import pytest +from rio_tiler.io import Reader + +from tilebench import profile as profiler + + +@pytest.mark.parametrize( + "src_path,head,get", + [ + ( + "s3://sentinel-cogs/sentinel-s2-l2a-cogs/15/T/VK/2023/10/S2B_15TVK_20231008_0_L2A/TCI.tif", + 0, + 3, + ), + ( + "https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/15/T/VK/2023/10/S2B_15TVK_20231008_0_L2A/TCI.tif", + 1, + 3, + ), + ], +) +@pytest.mark.xfail +def test_profiler(src_path, head, get): + """Test profiler.""" + config = { + "AWS_NO_SIGN_REQUEST": True, + "AWS_DEFAULT_REGION": "us-west-2", + "GDAL_DISABLE_READDIR_ON_OPEN": "EMPTY_DIR", + } + + @profiler( + quiet=True, + add_to_return=True, + config=config, + ) + def _read_tile(src_path: str, x: int, y: int, z: int, tilesize: int = 256): + with Reader(src_path) as cog: + return cog.tile(x, y, z, tilesize=tilesize) + + (_, _), stats = _read_tile(src_path, 121, 185, 9) + assert stats["HEAD"]["count"] == head + assert stats["GET"]["count"] == get + assert stats["GET"]["bytes"] == 386677 diff --git a/tilebench/__init__.py b/tilebench/__init__.py index 2be9250..1629b25 100644 --- a/tilebench/__init__.py +++ b/tilebench/__init__.py @@ -30,9 +30,18 @@ def parse_rasterio_io_logs(logs: List[str]) -> Dict[str, Any]: # GET all_get_requests = len([line for line in logs if "CURL_INFO_HEADER_OUT: GET" in line]) - get_requests = [line for line in logs if ": Downloading" in line] + get_requests = [ + line for line in logs if "CURL_INFO_HEADER_IN: Content-Range: bytes" in line + ] get_values = [ - list(map(int, get.split(" Downloading ")[1].split(" ")[0].split("-"))) + list( + map( + int, + get.split("CURL_INFO_HEADER_IN: Content-Range: bytes ")[1] + .split("/")[0] + .split("-"), + ) + ) for get in get_requests ] get_values_str = [f"{start}-{end}" for (start, end) in get_values]