From a47887a9705d9cfbbbab9fe2cdc6ba7331c23f9b Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Thu, 16 Oct 2025 11:43:04 +0200 Subject: [PATCH] Update test to included statistics before reboot. Temporarily point to unmerged crawlee version that contains the fix. --- pyproject.toml | 2 +- tests/integration/actor_source_base/requirements.txt | 2 +- tests/integration/test_actor_lifecycle.py | 6 ++++-- uv.lock | 10 +++------- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 36135563..0e3c5da0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ keywords = [ dependencies = [ "apify-client>=2.0.0,<3.0.0", "apify-shared>=2.0.0,<3.0.0", - "crawlee>=1.0.2,<2.0.0", + "crawlee @ git+https://github.com/apify/crawlee-python@crawler-persistance", "cachetools>=5.5.0", "cryptography>=42.0.0", "impit>=0.6.1", diff --git a/tests/integration/actor_source_base/requirements.txt b/tests/integration/actor_source_base/requirements.txt index f8dcde1b..7e7d50d1 100644 --- a/tests/integration/actor_source_base/requirements.txt +++ b/tests/integration/actor_source_base/requirements.txt @@ -1,4 +1,4 @@ # The test fixture will put the Apify SDK wheel path on the next line APIFY_SDK_WHEEL_PLACEHOLDER uvicorn[standard] -crawlee[parsel]>=1.0.0,<2.0.0 +crawlee[parsel] @ git+https://github.com/apify/crawlee-python@crawler-persistance diff --git a/tests/integration/test_actor_lifecycle.py b/tests/integration/test_actor_lifecycle.py index 649dc1d1..ddd367a9 100644 --- a/tests/integration/test_actor_lifecycle.py +++ b/tests/integration/test_actor_lifecycle.py @@ -122,7 +122,8 @@ async def main() -> None: async def test_actor_with_crawler_reboot(make_actor: MakeActorFunction, run_actor: RunActorFunction) -> None: """Test that crawler in actor works as expected after reboot. - Handle two requests. Reboot in between the two requests.""" + Handle two requests. Reboot in between the two requests. The second run should include statistics of the fist run. + """ async def main() -> None: from crawlee._types import BasicCrawlingContext, ConcurrencySettings @@ -152,7 +153,8 @@ async def default_handler(context: BasicCrawlingContext) -> None: await crawler.run(requests) # Each time one request is finished. - assert crawler.statistics.state.requests_finished == 1 + expected_requests_finished = 1 if first_run else 2 + assert crawler.statistics.state.requests_finished == expected_requests_finished actor = await make_actor(label='migration', main_func=main) run_result = await run_actor(actor) diff --git a/uv.lock b/uv.lock index 84827538..33696879 100644 --- a/uv.lock +++ b/uv.lock @@ -76,7 +76,7 @@ requires-dist = [ { name = "apify-client", specifier = ">=2.0.0,<3.0.0" }, { name = "apify-shared", specifier = ">=2.0.0,<3.0.0" }, { name = "cachetools", specifier = ">=5.5.0" }, - { name = "crawlee", specifier = ">=1.0.2,<2.0.0" }, + { name = "crawlee", git = "https://github.com/apify/crawlee-python?rev=crawler-persistance" }, { name = "cryptography", specifier = ">=42.0.0" }, { name = "impit", specifier = ">=0.6.1" }, { name = "lazy-object-proxy", specifier = ">=1.11.0" }, @@ -516,8 +516,8 @@ toml = [ [[package]] name = "crawlee" -version = "1.0.2" -source = { registry = "https://pypi.org/simple" } +version = "1.0.3" +source = { git = "https://github.com/apify/crawlee-python?rev=crawler-persistance#b2b4724ce92f398f62dbb307257f31947b65e2a2" } dependencies = [ { name = "cachetools" }, { name = "colorama" }, @@ -532,10 +532,6 @@ dependencies = [ { name = "typing-extensions" }, { name = "yarl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/93/68/45641208866a60176be4c5f2ab620c2122df18db956dc86a03471181e7c3/crawlee-1.0.2.tar.gz", hash = "sha256:522b52c1362d116b95ba85820f87001713f290a3ec690568adb862a4b29d7ca4", size = 24900937, upload-time = "2025-10-08T07:59:09.983Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/be/ac/860de31ca534adb1d6321f66c7d082ba735eff49090f67a316f8d60f1ee2/crawlee-1.0.2-py3-none-any.whl", hash = "sha256:57a63d0b22493297490a5836e6b1d47dee667004d95bbc01387dcfb00f6a8a7a", size = 304369, upload-time = "2025-10-08T07:59:07.475Z" }, -] [package.optional-dependencies] parsel = [