Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions tests/unit/README.md
Original file line number Diff line number Diff line change
@@ -1 +1,11 @@
# Unit tests

Some tests may exhibit flaky behavior in CI. The reason for flaky behavior should be understood as it can indicate bug in the code or design flaw in the test. There are other reasons related to test execution, such as some tests that are not (or can not be) properly isolated, or limited resource constraints of the test executor.

Here are some suggested approaches to mitigate flakiness, sorted in the order of preference:
- Investigate the root cause and fix the code or test.
- Apply one of the pytest marks to mitigate the flakiness:
- `@run_alone_on_mac` - Test with such mark will run alone on macOS exeutor in CI (normally several tests run in parallel, which can cause resource-sensitive tests to fail.) Use for resource sensitive tests that are known to be flaky only on macOS.
- `@run_alone` - Test with such mark will run alone on any executor. Use for resource sensitive tests that are known to be flaky on all platforms or for tests that can not be run in parallel with other test due to their design (This should be extremely rare).
- `@pytest.mark.flaky` - Test with such mark will be retried several times if it fails. Use for tests that are known to be flaky, but the reason for flakiness is not understood or can not be easily mitigated.
- `@pytest.mark.skip` - Test with such mark will be skipped. Use when none of the above approaches mitigate the test flakiness. Marking test as skipped should be a last resort, as it can hide potential bugs and give false sense of security. Skipped tests should be tracked in GitHub issue.
19 changes: 17 additions & 2 deletions tests/unit/_autoscaling/test_snapshotter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import asyncio
from datetime import datetime, timedelta, timezone
from logging import getLogger
from typing import TYPE_CHECKING, cast
Expand All @@ -23,8 +24,22 @@

@pytest.fixture
async def event_manager() -> AsyncGenerator[LocalEventManager, None]:
# Use a long interval to avoid interference from periodic system info events during tests
async with LocalEventManager(system_info_interval=timedelta(hours=9999)) as event_manager:
# Use a long interval to avoid interference from periodic system info events during tests and ensure the first
# automatic event is consumed before yielding.

event_manager = LocalEventManager(system_info_interval=timedelta(hours=9999))

initial_system_info_consumed = asyncio.Event()

async def consume_automatic_system_info(_: EventSystemInfoData) -> None:
initial_system_info_consumed.set()

event_manager.on(event=Event.SYSTEM_INFO, listener=consume_automatic_system_info)

async with event_manager:
await initial_system_info_consumed.wait()
event_manager.off(event=Event.SYSTEM_INFO, listener=consume_automatic_system_info)

yield event_manager


Expand Down
9 changes: 7 additions & 2 deletions tests/unit/_statistics/test_request_max_duration.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@

async def test_request_max_duration_tracks_maximum() -> None:
"""Test that request_max_duration correctly tracks the maximum duration, not the minimum."""

# asyncio.sleep() can sleep slightly shorter than expected https://bugs.python.org/issue31539#msg302699
asyncio_sleep_time_tolerance = 0.015
sleep_time = 0.05

async with Statistics.with_default_state() as statistics:
# Record a short request
statistics.record_request_processing_start('request_1')
Expand All @@ -15,15 +20,15 @@ async def test_request_max_duration_tracks_maximum() -> None:

# Record a longer request
statistics.record_request_processing_start('request_2')
await asyncio.sleep(0.05) # 50ms delay
await asyncio.sleep(sleep_time) # 50ms delay
statistics.record_request_processing_finish('request_2')
second_duration = statistics.state.request_max_duration

# The max duration should be updated to the longer request's duration
assert second_duration is not None
assert first_duration is not None
assert second_duration >= first_duration
assert second_duration.total_seconds() >= 0.05
assert second_duration.total_seconds() >= (sleep_time - asyncio_sleep_time_tolerance)

# Record another short request - max should NOT decrease
statistics.record_request_processing_start('request_3')
Expand Down
2 changes: 2 additions & 0 deletions tests/unit/_utils/test_recurring_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pytest

from crawlee._utils.recurring_task import RecurringTask
from tests.unit.utils import run_alone_on_mac


@pytest.fixture
Expand Down Expand Up @@ -41,6 +42,7 @@ async def test_start_and_stop(function: AsyncMock, delay: timedelta) -> None:
assert rt.task.done()


@run_alone_on_mac
async def test_execution(function: AsyncMock, delay: timedelta) -> None:
task = RecurringTask(function, delay)

Expand Down
2 changes: 2 additions & 0 deletions tests/unit/browsers/test_browser_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest

from crawlee.browsers import BrowserPool, PlaywrightBrowserPlugin
from tests.unit.utils import run_alone_on_mac

if TYPE_CHECKING:
from yarl import URL
Expand Down Expand Up @@ -92,6 +93,7 @@ async def test_new_page_with_each_plugin(server_url: URL) -> None:
assert browser_pool.total_pages_count == 2


@run_alone_on_mac
async def test_with_default_plugin_constructor(server_url: URL) -> None:
async with BrowserPool.with_default_plugin(headless=True, browser_type='firefox') as browser_pool:
assert len(browser_pool.plugins) == 1
Expand Down
3 changes: 2 additions & 1 deletion tests/unit/crawlers/_basic/test_basic_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1373,6 +1373,7 @@ async def test_timeout_in_handler(sleep_type: str) -> None:
# Test is skipped in older Python versions.
from asyncio import timeout # type:ignore[attr-defined] # noqa: PLC0415

non_realtime_system_coefficient = 2
handler_timeout = timedelta(seconds=1)
max_request_retries = 3
double_handler_timeout_s = handler_timeout.total_seconds() * 2
Expand Down Expand Up @@ -1401,7 +1402,7 @@ async def handler(context: BasicCrawlingContext) -> None:

# Timeout in pytest, because previous implementation would run crawler until following:
# "The request queue seems to be stuck for 300.0s, resetting internal state."
async with timeout(max_request_retries * double_handler_timeout_s):
async with timeout(max_request_retries * double_handler_timeout_s * non_realtime_system_coefficient):
await crawler.run(['https://a.placeholder.com'])

assert crawler.statistics.state.requests_finished == 1
Expand Down
5 changes: 5 additions & 0 deletions tests/unit/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import sys

import pytest

run_alone_on_mac = pytest.mark.run_alone if sys.platform == 'darwin' else lambda x: x
Loading