From c32d563cb899d45072ccb07709a6f4a53e6d97c4 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Wed, 3 Jun 2026 14:58:32 +0200 Subject: [PATCH 1/2] fix: Return all items from Redis dataset `get_data` with `desc=True` and `limit=None` The match statement fell through to the generic descending arm and built the JSON path `$[:-0]`, which slice semantics evaluate to an empty array, so a non-empty dataset silently returned zero items. --- src/crawlee/storage_clients/_redis/_dataset_client.py | 2 ++ .../_redis/test_redis_dataset_client.py | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/src/crawlee/storage_clients/_redis/_dataset_client.py b/src/crawlee/storage_clients/_redis/_dataset_client.py index f2383b47de..1a33ce609e 100644 --- a/src/crawlee/storage_clients/_redis/_dataset_client.py +++ b/src/crawlee/storage_clients/_redis/_dataset_client.py @@ -185,6 +185,8 @@ async def get_data( match (desc, offset, limit): case (True, 0, int()): json_path += f'[-{limit}:]' + case (True, 0, None): + json_path += '[:]' case (True, int(), None): json_path += f'[:-{offset}]' case (True, int(), int()): diff --git a/tests/unit/storage_clients/_redis/test_redis_dataset_client.py b/tests/unit/storage_clients/_redis/test_redis_dataset_client.py index 731f85b887..1cf9f5b163 100644 --- a/tests/unit/storage_clients/_redis/test_redis_dataset_client.py +++ b/tests/unit/storage_clients/_redis/test_redis_dataset_client.py @@ -148,6 +148,17 @@ async def test_metadata_record_updates(dataset_client: RedisDatasetClient) -> No assert metadata.accessed_at > accessed_after_get +async def test_get_data_descending_with_unlimited_limit(dataset_client: RedisDatasetClient) -> None: + """Test that get_data with desc=True and limit=None returns all items in reverse order.""" + items = [{'id': 1}, {'id': 2}, {'id': 3}] + await dataset_client.push_data(items) + + result = await dataset_client.get_data(desc=True, limit=None) + + assert result.count == 3 + assert result.items == [{'id': 3}, {'id': 2}, {'id': 1}] + + async def test_error_handling_on_push_failure(dataset_client: RedisDatasetClient) -> None: """Test that push_data properly handles Redis errors and retries.""" mock_pipe = MagicMock() From 1fcf7f0c54a8d10b2f6a5eb35f726ce1226dfc0f Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Thu, 4 Jun 2026 12:57:38 +0200 Subject: [PATCH 2/2] test: Move desc/unlimited get_data regression test to shared dataset suite --- .../_redis/test_redis_dataset_client.py | 11 ----------- tests/unit/storages/test_dataset.py | 11 +++++++++++ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/unit/storage_clients/_redis/test_redis_dataset_client.py b/tests/unit/storage_clients/_redis/test_redis_dataset_client.py index 1cf9f5b163..731f85b887 100644 --- a/tests/unit/storage_clients/_redis/test_redis_dataset_client.py +++ b/tests/unit/storage_clients/_redis/test_redis_dataset_client.py @@ -148,17 +148,6 @@ async def test_metadata_record_updates(dataset_client: RedisDatasetClient) -> No assert metadata.accessed_at > accessed_after_get -async def test_get_data_descending_with_unlimited_limit(dataset_client: RedisDatasetClient) -> None: - """Test that get_data with desc=True and limit=None returns all items in reverse order.""" - items = [{'id': 1}, {'id': 2}, {'id': 3}] - await dataset_client.push_data(items) - - result = await dataset_client.get_data(desc=True, limit=None) - - assert result.count == 3 - assert result.items == [{'id': 3}, {'id': 2}, {'id': 1}] - - async def test_error_handling_on_push_failure(dataset_client: RedisDatasetClient) -> None: """Test that push_data properly handles Redis errors and retries.""" mock_pipe = MagicMock() diff --git a/tests/unit/storages/test_dataset.py b/tests/unit/storages/test_dataset.py index a95efff7f6..3b0a8e1d6e 100644 --- a/tests/unit/storages/test_dataset.py +++ b/tests/unit/storages/test_dataset.py @@ -235,6 +235,17 @@ async def test_get_data_descending_order(dataset: Dataset) -> None: assert result.items[-1]['id'] == 1 +async def test_get_data_descending_with_unlimited_limit(dataset: Dataset) -> None: + """Test that get_data with desc=True and limit=None returns all items in reverse order.""" + items = [{'id': i} for i in range(1, 4)] # 3 items + await dataset.push_data(items) + + result = await dataset.get_data(desc=True, limit=None) + + assert result.count == 3 + assert result.items == [{'id': 3}, {'id': 2}, {'id': 1}] + + async def test_get_data_skip_empty(dataset: Dataset) -> None: """Test getting data with skip_empty option filters out empty items.""" # Add some items including an empty one