Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## 0.1.1 (Unreleased)

### Features

- Expose `crawler.log` to public.

### Bug fixes

- Fix Pylance `reportPrivateImportUsage` errors by defining `__all__` in modules `__init__.py`.
Expand Down
5 changes: 5 additions & 0 deletions src/crawlee/basic_crawler/basic_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,11 @@ def __init__(
self._running = False
self._has_finished_before = False

@property
def log(self) -> logging.Logger:
"""The logger used by the crawler."""
return self._logger

@property
def router(self) -> Router[TCrawlingContext]:
"""The router used to handle each individual crawling request."""
Expand Down
5 changes: 1 addition & 4 deletions src/crawlee/http_clients/httpx_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,7 @@ class HttpTransport(httpx.AsyncHTTPTransport):
"""A modified HTTP transport adapter that stores response cookies in a `Session` instead of the httpx client."""

@override
async def handle_async_request(
self,
request: httpx.Request,
) -> httpx.Response:
async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
response = await super().handle_async_request(request)
response.request = request

Expand Down
7 changes: 7 additions & 0 deletions tests/unit/basic_crawler/test_basic_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import asyncio
import json
import logging
from dataclasses import dataclass
from datetime import timedelta
from typing import TYPE_CHECKING, Any
Expand Down Expand Up @@ -577,3 +578,9 @@ async def handler(context: BasicCrawlingContext) -> None:
assert len(processed_urls) == 3
assert stats.requests_total == 3
assert stats.requests_finished == 3


def test_crawler_log() -> None:
crawler = BasicCrawler()
assert isinstance(crawler.log, logging.Logger)
crawler.log.info('Test log message')
2 changes: 1 addition & 1 deletion website/src/pages/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ async def main() -> None:

# Or work with the data directly.
data = await crawler.get_data()
crawler._logger.info(f'Extracted data: {data.items}')
crawler.log.info(f'Extracted data: {data.items}')


if __name__ == '__main__':
Expand Down