diff --git a/CHANGELOG.md b/CHANGELOG.md index 1337e40b81..14079c3053 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## 0.1.1 (Unreleased) +### Features + +- Expose `crawler.log` to public. + ### Bug fixes - Fix Pylance `reportPrivateImportUsage` errors by defining `__all__` in modules `__init__.py`. diff --git a/src/crawlee/basic_crawler/basic_crawler.py b/src/crawlee/basic_crawler/basic_crawler.py index 0362c13cfd..eebd86fb7a 100644 --- a/src/crawlee/basic_crawler/basic_crawler.py +++ b/src/crawlee/basic_crawler/basic_crawler.py @@ -221,6 +221,11 @@ def __init__( self._running = False self._has_finished_before = False + @property + def log(self) -> logging.Logger: + """The logger used by the crawler.""" + return self._logger + @property def router(self) -> Router[TCrawlingContext]: """The router used to handle each individual crawling request.""" diff --git a/src/crawlee/http_clients/httpx_client.py b/src/crawlee/http_clients/httpx_client.py index 76f61530bf..3fa2692630 100644 --- a/src/crawlee/http_clients/httpx_client.py +++ b/src/crawlee/http_clients/httpx_client.py @@ -24,10 +24,7 @@ class HttpTransport(httpx.AsyncHTTPTransport): """A modified HTTP transport adapter that stores response cookies in a `Session` instead of the httpx client.""" @override - async def handle_async_request( - self, - request: httpx.Request, - ) -> httpx.Response: + async def handle_async_request(self, request: httpx.Request) -> httpx.Response: response = await super().handle_async_request(request) response.request = request diff --git a/tests/unit/basic_crawler/test_basic_crawler.py b/tests/unit/basic_crawler/test_basic_crawler.py index 5795206bdf..28b357f726 100644 --- a/tests/unit/basic_crawler/test_basic_crawler.py +++ b/tests/unit/basic_crawler/test_basic_crawler.py @@ -3,6 +3,7 @@ import asyncio import json +import logging from dataclasses import dataclass from datetime import timedelta from typing import TYPE_CHECKING, Any @@ -577,3 +578,9 @@ async def handler(context: BasicCrawlingContext) -> None: assert len(processed_urls) == 3 assert stats.requests_total == 3 assert stats.requests_finished == 3 + + +def test_crawler_log() -> None: + crawler = BasicCrawler() + assert isinstance(crawler.log, logging.Logger) + crawler.log.info('Test log message') diff --git a/website/src/pages/index.js b/website/src/pages/index.js index 489eda8869..f93d50c82b 100644 --- a/website/src/pages/index.js +++ b/website/src/pages/index.js @@ -139,7 +139,7 @@ async def main() -> None: # Or work with the data directly. data = await crawler.get_data() - crawler._logger.info(f'Extracted data: {data.items}') + crawler.log.info(f'Extracted data: {data.items}') if __name__ == '__main__':