From 397bbfacdb75610de13c8c444eff8c2155e3c6d8 Mon Sep 17 00:00:00 2001 From: jlaportebot Date: Thu, 7 May 2026 12:36:57 -0400 Subject: [PATCH 1/5] Apply textwrap.dedent() to task docstrings When using the @task decorator, task documentation can be passed via the function docstring. However, the indentation from the function body was preserved in the UI, making the documentation look unattractive. This change applies textwrap.dedent() to the docstring before setting it as doc_md, which removes the common leading whitespace from each line. Fixes #66477 --- task-sdk/src/airflow/sdk/bases/decorator.py | 2 +- .../decorators/test_task_docstring.py | 139 ++++++++++++++++++ 2 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py diff --git a/task-sdk/src/airflow/sdk/bases/decorator.py b/task-sdk/src/airflow/sdk/bases/decorator.py index 8634fbe99647c..8ad9d1f6054db 100644 --- a/task-sdk/src/airflow/sdk/bases/decorator.py +++ b/task-sdk/src/airflow/sdk/bases/decorator.py @@ -527,7 +527,7 @@ def __call__(self, *args: FParams.args, **kwargs: FParams.kwargs) -> XComArg: op_doc_attrs = [op.doc, op.doc_json, op.doc_md, op.doc_rst, op.doc_yaml] # Set the task's doc_md to the function's docstring if it exists and no other doc* args are set. if self.function.__doc__ and not any(op_doc_attrs): - op.doc_md = self.function.__doc__ + op.doc_md = textwrap.dedent(self.function.__doc__) return XComArg(op) def _validate_arg_names(self, func: ValidationSource, kwargs: dict[str, Any]): diff --git a/task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py b/task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py new file mode 100644 index 0000000000000..bbdc3d9eeda91 --- /dev/null +++ b/task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py @@ -0,0 +1,139 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import pendulum +import pytest + +from airflow.sdk import dag, task + + +def test_task_docstring_dedent_applied(): + """Test that task docstring is dedented when passed via function docstring.""" + + @dag(schedule=None, start_date=pendulum.datetime(2022, 1, 1)) + def pipeline(): + @task + def my_task(): + """ + This task does something important. + + In case of error you should do the following: + 1. Check the logs + 2. Verify the configuration + 3. Contact support + """ + + return my_task() + + dag_obj = pipeline() + task_obj = dag_obj.task_dict["my_task"] + + # Verify that the docstring is dedented (no leading whitespace on each line) + expected_doc = """This task does something important. + +In case of error you should do the following: +1. Check the logs +2. Verify the configuration +3. Contact support""" + assert task_obj.doc_md == expected_doc + + +def test_task_docstring_dedent_with_explicit_doc_md(): + """Test that explicit doc_md is not overridden by function docstring.""" + + @dag(schedule=None, start_date=pendulum.datetime(2022, 1, 1)) + def pipeline(): + @task(doc_md="Explicit documentation") + def my_task(): + """ + This is the function docstring. + """ + + return my_task() + + dag_obj = pipeline() + task_obj = dag_obj.task_dict["my_task"] + + # Verify that explicit doc_md is used + assert task_obj.doc_md == "Explicit documentation" + + +def test_task_docstring_dedent_with_multiline_indentation(): + """Test that task docstring with complex indentation is properly dedented.""" + + @dag(schedule=None, start_date=pendulum.datetime(2022, 1, 1)) + def pipeline(): + @task + def my_task(): + """ + Task description. + + This is a more complex example with nested indentation: + + - First level + - Second level + - Third level + + And some code: + + ```python + def example(): + return "value" + ``` + """ + + return my_task() + + dag_obj = pipeline() + task_obj = dag_obj.task_dict["my_task"] + + # Verify that the docstring is dedented properly + expected_doc = """Task description. + +This is a more complex example with nested indentation: + +- First level + - Second level + - Third level + +And some code: + +```python +def example(): + return "value" +```""" + assert task_obj.doc_md == expected_doc + + +def test_task_no_docstring(): + """Test that task without docstring has no doc_md.""" + + @dag(schedule=None, start_date=pendulum.datetime(2022, 1, 1)) + def pipeline(): + @task + def my_task(): + pass + + return my_task() + + dag_obj = pipeline() + task_obj = dag_obj.task_dict["my_task"] + + # Verify that doc_md is None when there's no docstring + assert task_obj.doc_md is None From 5c7ea0491104dd7b1e18a84e4f9c0fe325eb1da3 Mon Sep 17 00:00:00 2001 From: jlaportebot Date: Sat, 16 May 2026 22:56:11 -0400 Subject: [PATCH 2/5] fix: strip whitespace from dedented docstring to remove leading/trailing newlines textwrap.dedent preserves leading/trailing newlines from docstrings. Adding .strip() ensures the doc_md is clean without leading/trailing whitespace. --- task-sdk/src/airflow/sdk/bases/decorator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/task-sdk/src/airflow/sdk/bases/decorator.py b/task-sdk/src/airflow/sdk/bases/decorator.py index 8ad9d1f6054db..3a623ebcf16cc 100644 --- a/task-sdk/src/airflow/sdk/bases/decorator.py +++ b/task-sdk/src/airflow/sdk/bases/decorator.py @@ -527,7 +527,7 @@ def __call__(self, *args: FParams.args, **kwargs: FParams.kwargs) -> XComArg: op_doc_attrs = [op.doc, op.doc_json, op.doc_md, op.doc_rst, op.doc_yaml] # Set the task's doc_md to the function's docstring if it exists and no other doc* args are set. if self.function.__doc__ and not any(op_doc_attrs): - op.doc_md = textwrap.dedent(self.function.__doc__) + op.doc_md = textwrap.dedent(self.function.__doc__).strip() return XComArg(op) def _validate_arg_names(self, func: ValidationSource, kwargs: dict[str, Any]): From 7541598ee3e113c4811726c41a5cc1844d5196eb Mon Sep 17 00:00:00 2001 From: jlaportebot Date: Sun, 17 May 2026 18:57:54 -0400 Subject: [PATCH 3/5] test: add test for dedented task docstrings --- .../decorators/test_task_docstring.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py b/task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py index bbdc3d9eeda91..ec2aa4d251aed 100644 --- a/task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py +++ b/task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py @@ -137,3 +137,21 @@ def my_task(): # Verify that doc_md is None when there's no docstring assert task_obj.doc_md is None + + +def test_task_docstring_dedent_simple(): + """Test that a simple indented docstring is dedented and stripped correctly.""" + + @dag(schedule=None, start_date=pendulum.datetime(2022, 1, 1)) + def pipeline(): + @task + def my_task(): + """ My task description. """ + + return my_task() + + dag_obj = pipeline() + task_obj = dag_obj.task_dict["my_task"] + + # Verify leading/trailing whitespace is stripped + assert task_obj.doc_md == "My task description." From a7d25d36d338a6467223a3c49a453e5f5f995133 Mon Sep 17 00:00:00 2001 From: jlaportebot Date: Sun, 17 May 2026 21:30:48 -0400 Subject: [PATCH 4/5] test: add test for already-dedented docstrings (no-op case) Addresses reviewer feedback: adds edge case test verifying that textwrap.dedent is effectively a no-op when docstrings have no common leading whitespace. --- .../decorators/test_task_docstring.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py b/task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py index ec2aa4d251aed..53d890204c043 100644 --- a/task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py +++ b/task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py @@ -155,3 +155,31 @@ def my_task(): # Verify leading/trailing whitespace is stripped assert task_obj.doc_md == "My task description." + + +def test_task_docstring_already_dedented(): + """Test that already-dedented docstrings are handled as a no-op by textwrap.dedent. + + When a docstring has no common leading whitespace, textwrap.dedent should + return it unchanged and .strip() only removes surrounding whitespace. + """ + import textwrap + + # Verify textwrap.dedent behavior on non-indented strings + raw_doc = "This docstring has no leading indentation." + assert textwrap.dedent(raw_doc).strip() == "This docstring has no leading indentation." + + # With a simple one-liner docstring (no common indent to strip) + @dag(schedule=None, start_date=pendulum.datetime(2022, 1, 1)) + def pipeline(): + @task + def my_task(): + """This docstring has no leading indentation.""" + + return my_task() + + dag_obj = pipeline() + task_obj = dag_obj.task_dict["my_task"] + + # The docstring should be unchanged after dedent + strip + assert task_obj.doc_md == "This docstring has no leading indentation." From 57ed797d5943c9b9342e8e6ea7edbe06a866f623 Mon Sep 17 00:00:00 2001 From: jlaportebot Date: Mon, 18 May 2026 09:14:59 -0400 Subject: [PATCH 5/5] fix: remove unused pytest import and fix docstring formatting for ruff compliance --- .../task_sdk/definitions/decorators/test_task_docstring.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py b/task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py index 53d890204c043..287031166a0c8 100644 --- a/task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py +++ b/task-sdk/tests/task_sdk/definitions/decorators/test_task_docstring.py @@ -18,7 +18,6 @@ from __future__ import annotations import pendulum -import pytest from airflow.sdk import dag, task @@ -146,7 +145,7 @@ def test_task_docstring_dedent_simple(): def pipeline(): @task def my_task(): - """ My task description. """ + """My task description.""" return my_task()