From 4fb959e51d3c8aefbb787cccb0ed89f35d813b8f Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Tue, 11 Feb 2025 14:40:18 +0100
Subject: [PATCH 1/2] Set line length to docs related code to 90

Update existing examples to be compliant.
---
 docs/01_overview/code/01_introduction.py      |  5 +-
 .../code/02_crawlee_beautifulsoup.py          |  3 +-
 docs/02_guides/code/02_crawlee_playwright.py  | 18 ++++-
 docs/02_guides/code/scrapy_src/__main__.py    | 74 ++++++++++---------
 docs/02_guides/code/scrapy_src/items.py       |  3 +-
 docs/02_guides/code/scrapy_src/main.py        | 21 +++---
 docs/02_guides/code/scrapy_src/settings.py    |  3 +-
 .../code/scrapy_src/spiders/title.py          |  3 +-
 docs/03_concepts/code/03_rq.py                | 11 ++-
 docs/03_concepts/code/05_proxy_actor_input.py |  4 +-
 docs/03_concepts/code/05_proxy_rotation.py    | 16 +++-
 docs/03_concepts/code/09_webserver.py         |  4 +-
 docs/pyproject.toml                           |  9 +++
 13 files changed, 113 insertions(+), 61 deletions(-)
 create mode 100644 docs/pyproject.toml

diff --git a/docs/01_overview/code/01_introduction.py b/docs/01_overview/code/01_introduction.py
index c5441d61..a3eaba25 100644
--- a/docs/01_overview/code/01_introduction.py
+++ b/docs/01_overview/code/01_introduction.py
@@ -10,5 +10,8 @@ async def main() -> None:
         async with httpx.AsyncClient() as client:
             response = await client.get(actor_input['url'])
         soup = BeautifulSoup(response.content, 'html.parser')
-        data = {'url': actor_input['url'], 'title': soup.title.string if soup.title else None}
+        data = {
+            'url': actor_input['url'],
+            'title': soup.title.string if soup.title else None,
+        }
         await Actor.push_data(data)
diff --git a/docs/02_guides/code/02_crawlee_beautifulsoup.py b/docs/02_guides/code/02_crawlee_beautifulsoup.py
index 489d83ae..e2dba8a1 100644
--- a/docs/02_guides/code/02_crawlee_beautifulsoup.py
+++ b/docs/02_guides/code/02_crawlee_beautifulsoup.py
@@ -25,7 +25,8 @@ async def main() -> None:
 
         # Create a crawler.
         crawler = BeautifulSoupCrawler(
-            # Limit the crawl to max requests. Remove or increase it for crawling all links.
+            # Limit the crawl to max requests.
+            # Remove or increase it for crawling all links.
             max_requests_per_crawl=50,
         )
 
diff --git a/docs/02_guides/code/02_crawlee_playwright.py b/docs/02_guides/code/02_crawlee_playwright.py
index 674c1e94..2f0f110f 100644
--- a/docs/02_guides/code/02_crawlee_playwright.py
+++ b/docs/02_guides/code/02_crawlee_playwright.py
@@ -25,7 +25,8 @@ async def main() -> None:
 
         # Create a crawler.
         crawler = PlaywrightCrawler(
-            # Limit the crawl to max requests. Remove or increase it for crawling all links.
+            # Limit the crawl to max requests.
+            # Remove or increase it for crawling all links.
             max_requests_per_crawl=50,
             headless=True,
             browser_launch_options={
@@ -43,9 +44,18 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None:
             data = {
                 'url': context.request.url,
                 'title': await context.page.title(),
-                'h1s': [await h1.text_content() for h1 in await context.page.locator('h1').all()],
-                'h2s': [await h2.text_content() for h2 in await context.page.locator('h2').all()],
-                'h3s': [await h3.text_content() for h3 in await context.page.locator('h3').all()],
+                'h1s': [
+                    await h1.text_content()
+                    for h1 in await context.page.locator('h1').all()
+                ],
+                'h2s': [
+                    await h2.text_content()
+                    for h2 in await context.page.locator('h2').all()
+                ],
+                'h3s': [
+                    await h3.text_content()
+                    for h3 in await context.page.locator('h3').all()
+                ],
             }
 
             # Store the extracted data to the default dataset.
diff --git a/docs/02_guides/code/scrapy_src/__main__.py b/docs/02_guides/code/scrapy_src/__main__.py
index 56d477dd..c31adfb2 100644
--- a/docs/02_guides/code/scrapy_src/__main__.py
+++ b/docs/02_guides/code/scrapy_src/__main__.py
@@ -1,19 +1,20 @@
 """Apify Actor integration for Scrapy projects.
 
-This module transforms a Scrapy project into an Apify Actor, handling the configuration of logging, patching Scrapy's
-logging system, and establishing the required environment to run the Scrapy spider within the Apify platform.
+This module transforms a Scrapy project into an Apify Actor, handling the configuration
+of logging, patching Scrapy's logging system, and establishing the required environment
+to run the Scrapy spider within the Apify platform.
 
-This file is specifically designed to be executed when the project is run as an Apify Actor using `apify run` locally
-or being run on the Apify platform. It is not being executed when running the project as a Scrapy project using
-`scrapy crawl title_spider`.
+This file is specifically designed to be executed when the project is run as an Apify
+Actor using `apify run` locally or being run on the Apify platform. It is not being
+executed when running the project as a Scrapy project using `scrapy crawl title_spider`.
 
 We recommend you do not modify this file unless you really know what you are doing.
 """
 
 # ruff: noqa: E402
 
-# We need to configure the logging first before we import anything else, so that nothing else imports
-# `scrapy.utils.log` before we patch it.
+# We need to configure the logging first before we import anything else, so that nothing
+# else imports `scrapy.utils.log` before we patch it.
 from __future__ import annotations
 
 from logging import StreamHandler, getLogger
@@ -29,9 +30,10 @@
 OTHER_LOGGER_NAMES = ['filelock', 'hpack', 'httpcore', 'httpx', 'protego', 'twisted']
 ALL_LOGGER_NAMES = MAIN_LOGGER_NAMES + OTHER_LOGGER_NAMES
 
-# To change the logging level, modify the `LOG_LEVEL` field in `settings.py`. If the field is not present in the file,
-# Scrapy will default to `DEBUG`. This setting applies to all loggers. If you wish to change the logging level for
-# a specific logger, do it in this file.
+# To change the logging level, modify the `LOG_LEVEL` field in `settings.py`. If the
+# field is not present in the file, Scrapy will default to `DEBUG`. This setting applies
+# to all loggers. If you wish to change the logging level for a specific logger,
+# do it in this file.
 settings = get_project_settings()
 LOGGING_LEVEL = settings['LOG_LEVEL']
 
@@ -40,7 +42,9 @@
 apify_handler.setFormatter(ActorLogFormatter(include_logger_name=True))
 
 
-def configure_logger(logger_name: str | None, log_level: str, *handlers: StreamHandler) -> None:
+def configure_logger(
+    logger_name: str | None, log_level: str, *handlers: StreamHandler
+) -> None:
     """Configure a logger with the specified settings.
 
     Args:
@@ -56,41 +60,46 @@ def configure_logger(logger_name: str | None, log_level: str, *handlers: StreamH
         logger.addHandler(handler)
 
 
-# Apify loggers have to be set up here and in the `new_configure_logging` as well to be able to use them both from
+# Apify loggers have to be set up here and in the `new_configure_logging` as well to be
+# able to use them both from
 # the `main.py` and Scrapy components.
 for logger_name in MAIN_LOGGER_NAMES:
     configure_logger(logger_name, LOGGING_LEVEL, apify_handler)
 
-# We can't attach our log handler to the loggers normally, because Scrapy would remove them in the `configure_logging`
-# call here: https://github.com/scrapy/scrapy/blob/2.11.0/scrapy/utils/log.py#L113 (even though
-# `disable_existing_loggers` is set to False :facepalm:). We need to monkeypatch Scrapy's `configure_logging` method
-# like this, so that our handler is attached right after Scrapy calls the `configure_logging` method, because
-# otherwise we would lose some log messages.
+# We can't attach our log handler to the loggers normally, because Scrapy would remove
+# them in the `configure_logging` call here:
+# https://github.com/scrapy/scrapy/blob/2.11.0/scrapy/utils/log.py#L113 (even though
+# `disable_existing_loggers` is set to False :facepalm:). We need to monkeypatch Scrapy's
+# `configure_logging` method like this, so that our handler is attached right after
+# Scrapy calls the `configure_logging` method, because otherwise we would lose some log
+# messages.
 old_configure_logging = scrapy_logging.configure_logging
 
 
 def new_configure_logging(*args: Any, **kwargs: Any) -> None:
-    """Configure logging for Scrapy and root loggers to ensure consistent logging behavior.
+    """Configure logging for Scrapy and root loggers to ensure consistent log behavior.
 
-    We need to manually configure both the root logger and all Scrapy-associated loggers. Configuring only the root
-    logger is not sufficient, as Scrapy will override it with its own settings. Scrapy uses these four primary
-    loggers - https://github.com/scrapy/scrapy/blob/2.11.0/scrapy/utils/log.py#L60:L77. Therefore, we configure here
+    We need to manually configure both the root logger and all Scrapy-associated loggers.
+    Configuring only the root logger is not sufficient, as Scrapy will override it with
+    its own settings. Scrapy uses these four primary loggers:
+    https://github.com/scrapy/scrapy/blob/2.11.0/scrapy/utils/log.py#L60:L77. Therefore,
     these four loggers and the root logger.
     """
     old_configure_logging(*args, **kwargs)
 
-    # We modify the root (None) logger to ensure proper display of logs from spiders when using the `self.logger`
-    # property within spiders. See details in the Spider logger property:
+    # We modify the root (None) logger to ensure proper display of logs from spiders when
+    # using the `self.logger` property within spiders. See details in the Spider logger
+    # property:
     # https://github.com/scrapy/scrapy/blob/2.11.0/scrapy/spiders/__init__.py#L43:L46.
     configure_logger(None, LOGGING_LEVEL, apify_handler)
 
-    # We modify other loggers only by setting up their log level. A custom log handler is added
-    # only to the root logger to avoid duplicate log messages.
+    # We modify other loggers only by setting up their log level. A custom log handler
+    # is added only to the root logger to avoid duplicate log messages.
     for logger_name in ALL_LOGGER_NAMES:
         configure_logger(logger_name, LOGGING_LEVEL)
 
-    # Set the HTTPX logger explicitly to the WARNING level, because it is too verbose and spams the logs with useless
-    # messages, especially when running on the platform.
+    # Set the HTTPX logger explicitly to the WARNING level, because it is too verbose
+    # and spams the logs with useless messages, especially when running on the platform.
     configure_logger('httpx', 'WARNING')
 
 
@@ -105,12 +114,11 @@ def new_configure_logging(*args: Any, **kwargs: Any) -> None:
 
 from .main import main
 
-# For compatibility between Twisted (used by Scrapy) and AsyncIO (used by Apify) asynchronous libraries, it is
-# necessary to set the Twisted reactor to `AsyncioSelectorReactor`. This setup allows the two asynchronous libraries
-# to work together.
-#
-# Note: The reactor must be installed before applying `nest_asyncio.apply()`, otherwise, it will not work correctly
-# on Windows.
+# For compatibility between Twisted (used by Scrapy) and AsyncIO (used by Apify)
+# asynchronous libraries, it is necessary to set the Twisted reactor to
+# `AsyncioSelectorReactor`. This setup allows the two asynchronous libraries
+# to work together. Note: The reactor must be installed before applying
+# `nest_asyncio.apply()`, otherwise, it will not work correctly on Windows.
 install_reactor('twisted.internet.asyncioreactor.AsyncioSelectorReactor')
 nest_asyncio.apply()
 
diff --git a/docs/02_guides/code/scrapy_src/items.py b/docs/02_guides/code/scrapy_src/items.py
index eae7ff23..f6d8b28d 100644
--- a/docs/02_guides/code/scrapy_src/items.py
+++ b/docs/02_guides/code/scrapy_src/items.py
@@ -3,7 +3,8 @@
 This module defines Scrapy item models for scraped data. Items represent structured data
 extracted by spiders.
 
-For detailed information on creating and utilizing items, refer to the official documentation:
+For detailed information on creating and utilizing items,
+refer to the official documentation:
 https://docs.scrapy.org/en/latest/topics/items.html
 """
 
diff --git a/docs/02_guides/code/scrapy_src/main.py b/docs/02_guides/code/scrapy_src/main.py
index 1a878c5b..4bf9441b 100644
--- a/docs/02_guides/code/scrapy_src/main.py
+++ b/docs/02_guides/code/scrapy_src/main.py
@@ -1,22 +1,25 @@
 """This module defines the main entry point for the Apify Actor.
 
-This module defines the main coroutine for the Apify Scrapy Actor, executed from the __main__.py file. The coroutine
-processes the Actor's input and executes the Scrapy spider. Additionally, it updates Scrapy project settings by
-applying Apify-related settings. Which includes adding a custom scheduler, retry middleware, and an item pipeline
-for pushing data to the Apify dataset.
+This module defines the main coroutine for the Apify Scrapy Actor, executed from
+the __main__.py file. The coroutine processes the Actor's input and executes the Scrapy
+spider. Additionally, it updates Scrapy project settings by applying Apify-related
+settings. Which includes adding a custom scheduler, retry middleware, and an item
+pipeline for pushing data to the Apify dataset.
 
 Customization:
 --------------
 
-Feel free to customize this file to add specific functionality to the Actor, such as incorporating your own Scrapy
-components like spiders and handling Actor input. However, make sure you have a clear understanding of your
-modifications. For instance, removing `apply_apify_settings` break the integration between Scrapy and Apify.
+Feel free to customize this file to add specific functionality to the Actor, such
+as incorporating your own Scrapy components like spiders and handling Actor input.
+However, make sure you have a clear understanding of your modifications. For instance,
+removing `apply_apify_settings` break the integration between Scrapy and Apify.
 
 Documentation:
 --------------
 
-For an in-depth description of the Apify-Scrapy integration process, our Scrapy components, known limitations and
-other stuff, please refer to the following documentation page: https://docs.apify.com/cli/docs/integrating-scrapy.
+For an in-depth description of the Apify-Scrapy integration process, our Scrapy
+components, known limitations and other stuff, please refer to the following
+documentation page: https://docs.apify.com/cli/docs/integrating-scrapy.
 """
 
 from __future__ import annotations
diff --git a/docs/02_guides/code/scrapy_src/settings.py b/docs/02_guides/code/scrapy_src/settings.py
index 8a0fd3e6..c9d11107 100644
--- a/docs/02_guides/code/scrapy_src/settings.py
+++ b/docs/02_guides/code/scrapy_src/settings.py
@@ -1,6 +1,7 @@
 """Scrapy settings module.
 
-This module contains Scrapy settings for the project, defining various configurations and options.
+This module contains Scrapy settings for the project, defining various configurations
+and options.
 
 For more comprehensive details on Scrapy settings, refer to the official documentation:
 http://doc.scrapy.org/en/latest/topics/settings.html
diff --git a/docs/02_guides/code/scrapy_src/spiders/title.py b/docs/02_guides/code/scrapy_src/spiders/title.py
index 7be37b68..1c299abe 100644
--- a/docs/02_guides/code/scrapy_src/spiders/title.py
+++ b/docs/02_guides/code/scrapy_src/spiders/title.py
@@ -20,7 +20,8 @@ class TitleSpider(Spider):
 
     name = 'title_spider'
 
-    # The `start_urls` specified in this class will be merged with the `start_urls` value from your Actor input
+    # The `start_urls` specified in this class will be merged with the
+    # `start_urls` value from your Actor input
     # when the project is executed using Apify.
     start_urls = ['https://apify.com/']
 
diff --git a/docs/03_concepts/code/03_rq.py b/docs/03_concepts/code/03_rq.py
index ba6a9570..fe1ea605 100644
--- a/docs/03_concepts/code/03_rq.py
+++ b/docs/03_concepts/code/03_rq.py
@@ -19,7 +19,9 @@ async def main() -> None:
         await queue.add_request(Request.from_url('http://example.com/0'), forefront=True)
 
         # If you try to add an existing request again, it will not do anything
-        add_request_info = await queue.add_request(Request.from_url('http://different-example.com/5'))
+        add_request_info = await queue.add_request(
+            Request.from_url('http://different-example.com/5')
+        )
         Actor.log.info(f'Add request info: {add_request_info}')
 
         processed_request = await queue.get_request(add_request_info.id)
@@ -29,8 +31,8 @@ async def main() -> None:
         while not await queue.is_finished():
             # Fetch the next unhandled request in the queue
             request = await queue.fetch_next_request()
-            # This can happen due to the eventual consistency of the underlying request queue storage,
-            # best solution is just to sleep a bit
+            # This can happen due to the eventual consistency of the underlying request
+            # queue storage, best solution is just to sleep a bit.
             if request is None:
                 await asyncio.sleep(1)
                 continue
@@ -45,6 +47,7 @@ async def main() -> None:
                 Actor.log.info('Request successful.')
                 await queue.mark_request_as_handled(request)
             else:
-                # If processing the request was unsuccessful, reclaim it so it can be processed again
+                # If processing the request was unsuccessful, reclaim it so it can be
+                # processed again.
                 Actor.log.warning('Request failed, will retry!')
                 await queue.reclaim_request(request)
diff --git a/docs/03_concepts/code/05_proxy_actor_input.py b/docs/03_concepts/code/05_proxy_actor_input.py
index 3a69ea0a..3ca0344d 100644
--- a/docs/03_concepts/code/05_proxy_actor_input.py
+++ b/docs/03_concepts/code/05_proxy_actor_input.py
@@ -5,7 +5,9 @@ async def main() -> None:
     async with Actor:
         actor_input = await Actor.get_input() or {}
         proxy_settings = actor_input.get('proxySettings')
-        proxy_configuration = await Actor.create_proxy_configuration(actor_proxy_input=proxy_settings)
+        proxy_configuration = await Actor.create_proxy_configuration(
+            actor_proxy_input=proxy_settings
+        )
 
         if not proxy_configuration:
             raise RuntimeError('No proxy configuration available.')
diff --git a/docs/03_concepts/code/05_proxy_rotation.py b/docs/03_concepts/code/05_proxy_rotation.py
index c816dabf..8e6a5de0 100644
--- a/docs/03_concepts/code/05_proxy_rotation.py
+++ b/docs/03_concepts/code/05_proxy_rotation.py
@@ -17,7 +17,15 @@ async def main() -> None:
         proxy_url = await proxy_configuration.new_url()  # http://proxy-2.com
         proxy_url = await proxy_configuration.new_url()  # http://proxy-1.com
         proxy_url = await proxy_configuration.new_url()  # http://proxy-2.com
-        proxy_url = await proxy_configuration.new_url(session_id='a')  # http://proxy-1.com
-        proxy_url = await proxy_configuration.new_url(session_id='b')  # http://proxy-2.com
-        proxy_url = await proxy_configuration.new_url(session_id='b')  # http://proxy-2.com
-        proxy_url = await proxy_configuration.new_url(session_id='a')  # http://proxy-1.com
+        proxy_url = await proxy_configuration.new_url(
+            session_id='a'
+        )  # http://proxy-1.com
+        proxy_url = await proxy_configuration.new_url(
+            session_id='b'
+        )  # http://proxy-2.com
+        proxy_url = await proxy_configuration.new_url(
+            session_id='b'
+        )  # http://proxy-2.com
+        proxy_url = await proxy_configuration.new_url(
+            session_id='a'
+        )  # http://proxy-1.com
diff --git a/docs/03_concepts/code/09_webserver.py b/docs/03_concepts/code/09_webserver.py
index de6d953d..48a5c10d 100644
--- a/docs/03_concepts/code/09_webserver.py
+++ b/docs/03_concepts/code/09_webserver.py
@@ -21,7 +21,9 @@ def run_server() -> None:
     # Start the HTTP server on the provided port,
     # and save a reference to the server.
     global http_server
-    with ThreadingHTTPServer(('', Actor.config.web_server_port), RequestHandler) as server:
+    with ThreadingHTTPServer(
+        ('', Actor.config.web_server_port), RequestHandler
+    ) as server:
         Actor.log.info(f'Server running on {Actor.config.web_server_port}')
         http_server = server
         server.serve_forever()
diff --git a/docs/pyproject.toml b/docs/pyproject.toml
new file mode 100644
index 00000000..73a75678
--- /dev/null
+++ b/docs/pyproject.toml
@@ -0,0 +1,9 @@
+# Line lenght different from the rest of the code to make sure that the example codes visualised on the generated
+# documentation webpages are shown without vertical slider to make them more readable.
+
+[tool.ruff]
+# Inherit all from project top configuration file.
+extend = "../pyproject.toml"
+
+# Override just line length
+line-length = 90 # Maximum possible fit to the doc webpage. Longer lines need slider.

From ce543e3a326be3a0808b1e205a0a86e6856c9274 Mon Sep 17 00:00:00 2001
From: Josef Prochazka <josef.prochazka@apify.com>
Date: Tue, 18 Feb 2025 08:49:59 +0100
Subject: [PATCH 2/2] Remove scrapy_src leftovers after merge from master

---
 docs/02_guides/code/scrapy_src/__main__.py | 0
 docs/02_guides/code/scrapy_src/items.py    | 0
 docs/02_guides/code/scrapy_src/main.py     | 0
 docs/02_guides/code/scrapy_src/settings.py | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 docs/02_guides/code/scrapy_src/__main__.py
 delete mode 100644 docs/02_guides/code/scrapy_src/items.py
 delete mode 100644 docs/02_guides/code/scrapy_src/main.py
 delete mode 100644 docs/02_guides/code/scrapy_src/settings.py

diff --git a/docs/02_guides/code/scrapy_src/__main__.py b/docs/02_guides/code/scrapy_src/__main__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/docs/02_guides/code/scrapy_src/items.py b/docs/02_guides/code/scrapy_src/items.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/docs/02_guides/code/scrapy_src/main.py b/docs/02_guides/code/scrapy_src/main.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/docs/02_guides/code/scrapy_src/settings.py b/docs/02_guides/code/scrapy_src/settings.py
deleted file mode 100644
index e69de29b..00000000