diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index 96b4108ab7703..0de707561c774 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -99,7 +99,7 @@ jobs: }' --jq '.data.node.labels.nodes[]' | jq --slurp -c '[.[].name]' >> ${GITHUB_OUTPUT} if: github.event_name == 'pull_request_target' # Retrieve it to be able to determine which files has changed in the incoming commit of the PR - # we checkout the target commit and it's parent to be able to compare them + # we checkout the target commit and its parent to be able to compare them - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - uses: actions/checkout@v3 diff --git a/CONTRIBUTORS_QUICK_START.rst b/CONTRIBUTORS_QUICK_START.rst index 75168b4efccf8..7ccfac30d09fb 100644 --- a/CONTRIBUTORS_QUICK_START.rst +++ b/CONTRIBUTORS_QUICK_START.rst @@ -104,7 +104,7 @@ Colima ------ If you use Colima as your container runtimes engine, please follow the next steps: -1. `Install buildx manually `_ and follow it's instructions +1. `Install buildx manually `_ and follow its instructions 2. Link the Colima socket to the default socket path. Note that this may break other Docker servers. @@ -252,7 +252,7 @@ Typical development tasks ######################### For many of the development tasks you will need ``Breeze`` to be configured. ``Breeze`` is a development -environment which uses docker and docker-compose and it's main purpose is to provide a consistent +environment which uses docker and docker-compose and its main purpose is to provide a consistent and repeatable environment for all the contributors and CI. When using ``Breeze`` you avoid the "works for me" syndrome - because not only others can reproduce easily what you do, but also the CI of Airflow uses the same environment to run all tests - so you should be able to easily reproduce the same failures you diff --git a/Dockerfile b/Dockerfile index 5023e3ef9a3c0..43f085ce6afc2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1336,7 +1336,7 @@ RUN if [[ -f /docker-context-files/requirements.txt ]]; then \ ############################################################################################## # This is the actual Airflow image - much smaller than the build one. We copy -# installed Airflow and all it's dependencies from the build image to make it smaller. +# installed Airflow and all its dependencies from the build image to make it smaller. ############################################################################################## FROM ${PYTHON_BASE_IMAGE} as main diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index fbd2ba786ef24..66794268e2ac9 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -5439,7 +5439,7 @@ It has been removed. ``airflow.settings.CONTEXT_MANAGER_DAG`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -CONTEXT_MANAGER_DAG was removed from settings. It's role has been taken by ``DagContext`` in +CONTEXT_MANAGER_DAG was removed from settings. Its role has been taken by ``DagContext`` in 'airflow.models.dag'. One of the reasons was that settings should be rather static than store dynamic context from the DAG, but the main one is that moving the context out of settings allowed to untangle cyclic imports between DAG, BaseOperator, SerializedDAG, SerializedBaseOperator which was diff --git a/airflow/cli/cli_config.py b/airflow/cli/cli_config.py index 2ec55ff79498f..de1d43be6ffe5 100644 --- a/airflow/cli/cli_config.py +++ b/airflow/cli/cli_config.py @@ -593,7 +593,7 @@ def string_lower_type(val): ARG_DEPENDS_ON_PAST = Arg( ("-d", "--depends-on-past"), help="Determine how Airflow should deal with past dependencies. The default action is `check`, Airflow " - "will check if the the past dependencies are met for the tasks having `depends_on_past=True` before run " + "will check if the past dependencies are met for the tasks having `depends_on_past=True` before run " "them, if `ignore` is provided, the past dependencies will be ignored, if `wait` is provided and " "`depends_on_past=True`, Airflow will wait the past dependencies until they are met before running or " "skipping the task", diff --git a/airflow/cli/commands/dag_command.py b/airflow/cli/commands/dag_command.py index ea06619917f8f..4b54ac2f9f675 100644 --- a/airflow/cli/commands/dag_command.py +++ b/airflow/cli/commands/dag_command.py @@ -240,7 +240,7 @@ def dag_dependencies_show(args) -> None: @providers_configuration_loaded def dag_show(args) -> None: - """Display DAG or saves it's graphic representation to the file.""" + """Display DAG or saves its graphic representation to the file.""" dag = get_dag(args.subdir, args.dag_id) dot = render_dag(dag) filename = args.save diff --git a/airflow/example_dags/example_setup_teardown_taskflow.py b/airflow/example_dags/example_setup_teardown_taskflow.py index 4dcdbf253f6ca..b1bdbccf7fadc 100644 --- a/airflow/example_dags/example_setup_teardown_taskflow.py +++ b/airflow/example_dags/example_setup_teardown_taskflow.py @@ -50,7 +50,7 @@ def my_third_task(): # The method `as_teardown` will mark task_3 as teardown, task_1 as setup, and # arrow task_1 >> task_3. - # Now if you clear task_2, then it's setup task, task_1, will be cleared in + # Now if you clear task_2, then its setup task, task_1, will be cleared in # addition to its teardown task, task_3 # it's also possible to use a decorator to mark a task as setup or diff --git a/airflow/jobs/local_task_job_runner.py b/airflow/jobs/local_task_job_runner.py index c142504639328..d04af55dcedc3 100644 --- a/airflow/jobs/local_task_job_runner.py +++ b/airflow/jobs/local_task_job_runner.py @@ -43,7 +43,7 @@ an attempt by a program/library to write or read outside its allocated memory. In Python environment usually this signal refers to libraries which use low level C API. -Make sure that you use use right libraries/Docker Images +Make sure that you use right libraries/Docker Images for your architecture (Intel/ARM) and/or Operational System (Linux/macOS). Suggested way to debug diff --git a/airflow/jobs/scheduler_job_runner.py b/airflow/jobs/scheduler_job_runner.py index f128c7857351e..50f78a48a9680 100644 --- a/airflow/jobs/scheduler_job_runner.py +++ b/airflow/jobs/scheduler_job_runner.py @@ -1752,7 +1752,7 @@ def _cleanup_stale_dags(self, session: Session = NEW_SESSION) -> None: Find all dags that were not updated by Dag Processor recently and mark them as inactive. In case one of DagProcessors is stopped (in case there are multiple of them - for different dag folders), it's dags are never marked as inactive. + for different dag folders), its dags are never marked as inactive. Also remove dags from SerializedDag table. Executed on schedule only if [scheduler]standalone_dag_processor is True. """ diff --git a/airflow/jobs/triggerer_job_runner.py b/airflow/jobs/triggerer_job_runner.py index 81ac8c178f776..298b6cf38843d 100644 --- a/airflow/jobs/triggerer_job_runner.py +++ b/airflow/jobs/triggerer_job_runner.py @@ -508,7 +508,7 @@ async def cancel_triggers(self): """ Drain the to_cancel queue and ensure all triggers that are not in the DB are cancelled. - This allows the the cleanup job to delete them. + This allows the cleanup job to delete them. """ while self.to_cancel: trigger_id = self.to_cancel.popleft() diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py index 99e517656a129..6b3f561ffc8d3 100644 --- a/airflow/models/dagrun.py +++ b/airflow/models/dagrun.py @@ -1320,7 +1320,7 @@ def schedule_tis( """ Set the given task instances in to the scheduled state. - Each element of ``schedulable_tis`` should have it's ``task`` attribute already set. + Each element of ``schedulable_tis`` should have its ``task`` attribute already set. Any EmptyOperator without callbacks or outlets is instead set straight to the success state. diff --git a/airflow/providers/amazon/aws/hooks/quicksight.py b/airflow/providers/amazon/aws/hooks/quicksight.py index 02ea582631a69..586a73b97b750 100644 --- a/airflow/providers/amazon/aws/hooks/quicksight.py +++ b/airflow/providers/amazon/aws/hooks/quicksight.py @@ -152,7 +152,7 @@ def wait_for_state( :param target_state: Describes the QuickSight Job's Target State :param check_interval: the time interval in seconds which the operator will check the status of QuickSight Ingestion - :return: response of describe_ingestion call after Ingestion is is done + :return: response of describe_ingestion call after Ingestion is done """ while True: status = self.get_status(aws_account_id, data_set_id, ingestion_id) diff --git a/airflow/providers/amazon/aws/operators/eks.py b/airflow/providers/amazon/aws/operators/eks.py index 557bf29f5feed..98fbc66da43b4 100644 --- a/airflow/providers/amazon/aws/operators/eks.py +++ b/airflow/providers/amazon/aws/operators/eks.py @@ -987,7 +987,7 @@ class EksPodOperator(KubernetesPodOperator): empty, then the default boto3 configuration would be used (and must be maintained on each worker node). :param on_finish_action: What to do when the pod reaches its final state, or the execution is interrupted. - If "delete_pod", the pod will be deleted regardless it's state; if "delete_succeeded_pod", + If "delete_pod", the pod will be deleted regardless its state; if "delete_succeeded_pod", only succeeded pod will be deleted. You can set to "keep_pod" to keep the pod. Current default is `keep_pod`, but this will be changed in the next major release of this provider. :param is_delete_operator_pod: What to do when the pod reaches its final diff --git a/airflow/providers/amazon/aws/triggers/README.md b/airflow/providers/amazon/aws/triggers/README.md index cd0c0baae5d53..bd998a5a294ff 100644 --- a/airflow/providers/amazon/aws/triggers/README.md +++ b/airflow/providers/amazon/aws/triggers/README.md @@ -38,7 +38,7 @@ The first step to making an existing operator deferrable is to add `deferrable` The next step is to determine where the operator should be deferred. This will be dependent on what the operator does, and how it is written. Although every operator is different, there are a few guidelines to determine the best place to defer an operator. 1. If the operator has a `wait_for_completion` parameter, the `self.defer` method should be called right before the check for wait_for_completion . -2. If there is no `wait_for_completion` , look for the "main" task that the operator does. Often, operators will make various describe calls to to the boto3 API to verify certain conditions, or look up some information before performing its "main" task. Often, right after the "main" call to the boto3 API is made is a good place to call `self.defer`. +2. If there is no `wait_for_completion` , look for the "main" task that the operator does. Often, operators will make various describe calls to the boto3 API to verify certain conditions, or look up some information before performing its "main" task. Often, right after the "main" call to the boto3 API is made is a good place to call `self.defer`. Once the location to defer is decided in the operator, call the `self.defer` method if the `deferrable` flag is `True`. The `self.defer` method takes in several parameters, listed below: diff --git a/airflow/providers/apache/hdfs/CHANGELOG.rst b/airflow/providers/apache/hdfs/CHANGELOG.rst index 37de3a05cc49f..8a17a7f4a6924 100644 --- a/airflow/providers/apache/hdfs/CHANGELOG.rst +++ b/airflow/providers/apache/hdfs/CHANGELOG.rst @@ -75,7 +75,7 @@ you can use 3.* version of the provider, but the recommendation is to switch to Protobuf 3 required by the snakebite-py3 library has ended its life in June 2023 and Airflow and it's providers stopped supporting it. If you would like to continue using HDFS hooks and sensors based on snakebite-py3 library when you have protobuf library 4.+ you can install the 3.* version - of the provider but due to Protobuf incompatibility, you need to do one of the the two things: + of the provider but due to Protobuf incompatibility, you need to do one of the two things: * set ``PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python`` variable in your environment. * downgrade protobuf to latest 3.* version (3.20.3 at this time) diff --git a/airflow/providers/apache/kafka/triggers/await_message.py b/airflow/providers/apache/kafka/triggers/await_message.py index f2c1bb81ca6b6..8f89d2ba1f180 100644 --- a/airflow/providers/apache/kafka/triggers/await_message.py +++ b/airflow/providers/apache/kafka/triggers/await_message.py @@ -49,7 +49,7 @@ class AwaitMessageTrigger(BaseTrigger): defaults to None :param poll_timeout: How long the Kafka client should wait before returning from a poll request to Kafka (seconds), defaults to 1 - :param poll_interval: How long the the trigger should sleep after reaching the end of the Kafka log + :param poll_interval: How long the trigger should sleep after reaching the end of the Kafka log (seconds), defaults to 5 """ diff --git a/airflow/providers/cncf/kubernetes/operators/pod.py b/airflow/providers/cncf/kubernetes/operators/pod.py index eee7aab2ab30c..237975328eb00 100644 --- a/airflow/providers/cncf/kubernetes/operators/pod.py +++ b/airflow/providers/cncf/kubernetes/operators/pod.py @@ -232,7 +232,7 @@ class KubernetesPodOperator(BaseOperator): :param poll_interval: Polling period in seconds to check for the status. Used only in deferrable mode. :param log_pod_spec_on_failure: Log the pod's specification if a failure occurs :param on_finish_action: What to do when the pod reaches its final state, or the execution is interrupted. - If "delete_pod", the pod will be deleted regardless it's state; if "delete_succeeded_pod", + If "delete_pod", the pod will be deleted regardless its state; if "delete_succeeded_pod", only succeeded pod will be deleted. You can set to "keep_pod" to keep the pod. :param is_delete_operator_pod: What to do when the pod reaches its final state, or the execution is interrupted. If True (default), delete the diff --git a/airflow/providers/cncf/kubernetes/triggers/pod.py b/airflow/providers/cncf/kubernetes/triggers/pod.py index 7b7a9afe59455..f3f18a660ab97 100644 --- a/airflow/providers/cncf/kubernetes/triggers/pod.py +++ b/airflow/providers/cncf/kubernetes/triggers/pod.py @@ -61,7 +61,7 @@ class KubernetesPodTrigger(BaseTrigger): :param get_logs: get the stdout of the container as logs of the tasks. :param startup_timeout: timeout in seconds to start up the pod. :param on_finish_action: What to do when the pod reaches its final state, or the execution is interrupted. - If "delete_pod", the pod will be deleted regardless it's state; if "delete_succeeded_pod", + If "delete_pod", the pod will be deleted regardless its state; if "delete_succeeded_pod", only succeeded pod will be deleted. You can set to "keep_pod" to keep the pod. :param should_delete_pod: What to do when the pod reaches its final state, or the execution is interrupted. If True (default), delete the diff --git a/airflow/providers/common/sql/hooks/sql.pyi b/airflow/providers/common/sql/hooks/sql.pyi index 30d8eef488d9b..dedac037dfcb2 100644 --- a/airflow/providers/common/sql/hooks/sql.pyi +++ b/airflow/providers/common/sql/hooks/sql.pyi @@ -18,7 +18,7 @@ # This is automatically generated stub for the `common.sql` provider # # This file is generated automatically by the `update-common-sql-api stubs` pre-commit -# and the .pyi file represents part of the the "public" API that the +# and the .pyi file represents part of the "public" API that the # `common.sql` provider exposes to other providers. # # Any, potentially breaking change in the stubs will require deliberate manual action from the contributor diff --git a/airflow/providers/common/sql/operators/sql.pyi b/airflow/providers/common/sql/operators/sql.pyi index f2735b4a252ab..d9d099f948941 100644 --- a/airflow/providers/common/sql/operators/sql.pyi +++ b/airflow/providers/common/sql/operators/sql.pyi @@ -18,7 +18,7 @@ # This is automatically generated stub for the `common.sql` provider # # This file is generated automatically by the `update-common-sql-api stubs` pre-commit -# and the .pyi file represents part of the the "public" API that the +# and the .pyi file represents part of the "public" API that the # `common.sql` provider exposes to other providers. # # Any, potentially breaking change in the stubs will require deliberate manual action from the contributor diff --git a/airflow/providers/common/sql/sensors/sql.pyi b/airflow/providers/common/sql/sensors/sql.pyi index ed88c4b8109fe..0343d04871e50 100644 --- a/airflow/providers/common/sql/sensors/sql.pyi +++ b/airflow/providers/common/sql/sensors/sql.pyi @@ -18,7 +18,7 @@ # This is automatically generated stub for the `common.sql` provider # # This file is generated automatically by the `update-common-sql-api stubs` pre-commit -# and the .pyi file represents part of the the "public" API that the +# and the .pyi file represents part of the "public" API that the # `common.sql` provider exposes to other providers. # # Any, potentially breaking change in the stubs will require deliberate manual action from the contributor diff --git a/airflow/providers/google/cloud/operators/kubernetes_engine.py b/airflow/providers/google/cloud/operators/kubernetes_engine.py index 3f730d5bda343..642c4bf279555 100644 --- a/airflow/providers/google/cloud/operators/kubernetes_engine.py +++ b/airflow/providers/google/cloud/operators/kubernetes_engine.py @@ -429,7 +429,7 @@ class GKEStartPodOperator(KubernetesPodOperator): :param regional: The location param is region name. :param deferrable: Run operator in the deferrable mode. :param on_finish_action: What to do when the pod reaches its final state, or the execution is interrupted. - If "delete_pod", the pod will be deleted regardless it's state; if "delete_succeeded_pod", + If "delete_pod", the pod will be deleted regardless its state; if "delete_succeeded_pod", only succeeded pod will be deleted. You can set to "keep_pod" to keep the pod. Current default is `keep_pod`, but this will be changed in the next major release of this provider. :param is_delete_operator_pod: What to do when the pod reaches its final diff --git a/airflow/providers/google/cloud/operators/pubsub.py b/airflow/providers/google/cloud/operators/pubsub.py index 8e0f7a12d99a3..f91e9ea8c18dd 100644 --- a/airflow/providers/google/cloud/operators/pubsub.py +++ b/airflow/providers/google/cloud/operators/pubsub.py @@ -709,7 +709,7 @@ class PubSubPullOperator(GoogleCloudBaseOperator): :param gcp_conn_id: The connection ID to use connecting to Google Cloud. :param messages_callback: (Optional) Callback to process received messages. - It's return value will be saved to XCom. + Its return value will be saved to XCom. If you are pulling large messages, you probably want to provide a custom callback. If not provided, the default implementation will convert `ReceivedMessage` objects into JSON-serializable dicts using `google.protobuf.json_format.MessageToDict` function. diff --git a/airflow/providers/google/cloud/sensors/pubsub.py b/airflow/providers/google/cloud/sensors/pubsub.py index c818f7168e761..7bd07a08e5059 100644 --- a/airflow/providers/google/cloud/sensors/pubsub.py +++ b/airflow/providers/google/cloud/sensors/pubsub.py @@ -73,7 +73,7 @@ class PubSubPullSensor(BaseSensorOperator): :param gcp_conn_id: The connection ID to use connecting to Google Cloud. :param messages_callback: (Optional) Callback to process received messages. - It's return value will be saved to XCom. + Its return value will be saved to XCom. If you are pulling large messages, you probably want to provide a custom callback. If not provided, the default implementation will convert `ReceivedMessage` objects into JSON-serializable dicts using `google.protobuf.json_format.MessageToDict` function. diff --git a/airflow/providers/google/cloud/triggers/bigquery_dts.py b/airflow/providers/google/cloud/triggers/bigquery_dts.py index def8b90b66a30..3a5ab2267f97a 100644 --- a/airflow/providers/google/cloud/triggers/bigquery_dts.py +++ b/airflow/providers/google/cloud/triggers/bigquery_dts.py @@ -95,7 +95,7 @@ async def run(self) -> AsyncIterator[TriggerEvent]: self.log.info("Current state is %s", state) if state == TransferState.SUCCEEDED: - self.log.info("Job has completed it's work.") + self.log.info("Job has completed its work.") yield TriggerEvent( { "status": "success", diff --git a/airflow/providers/google/cloud/triggers/kubernetes_engine.py b/airflow/providers/google/cloud/triggers/kubernetes_engine.py index ba0df0fc153c2..1e0780fbc66c9 100644 --- a/airflow/providers/google/cloud/triggers/kubernetes_engine.py +++ b/airflow/providers/google/cloud/triggers/kubernetes_engine.py @@ -55,7 +55,7 @@ class GKEStartPodTrigger(KubernetesPodTrigger): will consult the class variable BASE_CONTAINER_NAME (which defaults to "base") for the base container name to use. :param on_finish_action: What to do when the pod reaches its final state, or the execution is interrupted. - If "delete_pod", the pod will be deleted regardless it's state; if "delete_succeeded_pod", + If "delete_pod", the pod will be deleted regardless its state; if "delete_succeeded_pod", only succeeded pod will be deleted. You can set to "keep_pod" to keep the pod. :param should_delete_pod: What to do when the pod reaches its final state, or the execution is interrupted. If True (default), delete the diff --git a/airflow/providers/google/cloud/triggers/pubsub.py b/airflow/providers/google/cloud/triggers/pubsub.py index 40c43f7cb79dd..27dbd8285cac6 100644 --- a/airflow/providers/google/cloud/triggers/pubsub.py +++ b/airflow/providers/google/cloud/triggers/pubsub.py @@ -41,7 +41,7 @@ class PubsubPullTrigger(BaseTrigger): immediately rather than by any downstream tasks :param gcp_conn_id: Reference to google cloud connection id :param messages_callback: (Optional) Callback to process received messages. - It's return value will be saved to XCom. + Its return value will be saved to XCom. If you are pulling large messages, you probably want to provide a custom callback. If not provided, the default implementation will convert `ReceivedMessage` objects into JSON-serializable dicts using `google.protobuf.json_format.MessageToDict` function. diff --git a/airflow/providers/hashicorp/hooks/vault.py b/airflow/providers/hashicorp/hooks/vault.py index 31867c938397f..0fe8e21031129 100644 --- a/airflow/providers/hashicorp/hooks/vault.py +++ b/airflow/providers/hashicorp/hooks/vault.py @@ -49,7 +49,7 @@ class VaultHook(BaseHook): The mount point should be placed as a path in the URL - similarly to Vault's URL schema: This indicates the "path" the secret engine is mounted on. Default id not specified is "secret". Note that this ``mount_point`` is not used for authentication if authentication is done via a - different engines. Each engine uses it's own engine-specific authentication mount_point. + different engines. Each engine uses its own engine-specific authentication mount_point. The extras in the connection are named the same as the parameters ('kv_engine_version', 'auth_type', ...). diff --git a/airflow/providers/microsoft/azure/CHANGELOG.rst b/airflow/providers/microsoft/azure/CHANGELOG.rst index c5dac31b9d86f..deec92ca60bc8 100644 --- a/airflow/providers/microsoft/azure/CHANGELOG.rst +++ b/airflow/providers/microsoft/azure/CHANGELOG.rst @@ -718,11 +718,11 @@ Breaking changes This change removes ``azure_container_instance_default`` connection type and replaces it with the ``azure_default``. The problem was that AzureContainerInstance was not needed as it was exactly the -same as the plain "azure" connection, however it's presence caused duplication in the field names +same as the plain "azure" connection, however its presence caused duplication in the field names used in the UI editor for connections and unnecessary warnings generated. This version uses plain Azure Hook and connection also for Azure Container Instance. If you already have ``azure_container_instance_default`` connection created in your DB, it will continue to work, but -the first time you edit it with the UI you will have to change it's type to ``azure_default``. +the first time you edit it with the UI you will have to change its type to ``azure_default``. Features ~~~~~~~~ diff --git a/airflow/providers/microsoft/azure/hooks/data_lake.py b/airflow/providers/microsoft/azure/hooks/data_lake.py index ef84ec51c2681..95ef4c6cc2789 100644 --- a/airflow/providers/microsoft/azure/hooks/data_lake.py +++ b/airflow/providers/microsoft/azure/hooks/data_lake.py @@ -241,7 +241,7 @@ class AzureDataLakeStorageV2Hook(BaseHook): accounts that have a hierarchical namespace. Using Adls_v2 connection details create DataLakeServiceClient object. - Due to Wasb is marked as legacy and and retirement of the (ADLS1), it would + Due to Wasb is marked as legacy and retirement of the (ADLS1), it would be nice to implement ADLS gen2 hook for interacting with the storage account. .. seealso:: diff --git a/airflow/providers/microsoft/azure/operators/data_factory.py b/airflow/providers/microsoft/azure/operators/data_factory.py index 1823212473f6b..d6b4592e35b6e 100644 --- a/airflow/providers/microsoft/azure/operators/data_factory.py +++ b/airflow/providers/microsoft/azure/operators/data_factory.py @@ -92,7 +92,7 @@ class AzureDataFactoryRunPipelineOperator(BaseOperator): ``AzureDataFactoryHook`` will attempt to use the resource group name provided in the corresponding connection. :param factory_name: The data factory name. If a value is not passed in to the operator, the - ``AzureDataFactoryHook`` will attempt to use the factory name name provided in the corresponding + ``AzureDataFactoryHook`` will attempt to use the factory name provided in the corresponding connection. :param reference_pipeline_run_id: The pipeline run identifier. If this run ID is specified the parameters of the specified run will be used to create a new run. diff --git a/airflow/ti_deps/deps/mapped_task_expanded.py b/airflow/ti_deps/deps/mapped_task_expanded.py index 87a804006be45..8138de9f9e476 100644 --- a/airflow/ti_deps/deps/mapped_task_expanded.py +++ b/airflow/ti_deps/deps/mapped_task_expanded.py @@ -21,7 +21,7 @@ class MappedTaskIsExpanded(BaseTIDep): - """Checks that a mapped task has been expanded before it's TaskInstance can run.""" + """Checks that a mapped task has been expanded before its TaskInstance can run.""" NAME = "Task has been mapped" IGNORABLE = False diff --git a/airflow/ti_deps/deps/not_previously_skipped_dep.py b/airflow/ti_deps/deps/not_previously_skipped_dep.py index 855f04af533d1..92dd2b373acdb 100644 --- a/airflow/ti_deps/deps/not_previously_skipped_dep.py +++ b/airflow/ti_deps/deps/not_previously_skipped_dep.py @@ -84,7 +84,7 @@ def _get_dep_statuses(self, ti, session, dep_context): ) if not past_depends_met: yield self._failing_status( - reason=("Task should be skipped but the the past depends are not met") + reason=("Task should be skipped but the past depends are not met") ) return ti.set_state(TaskInstanceState.SKIPPED, session) diff --git a/airflow/ti_deps/deps/trigger_rule_dep.py b/airflow/ti_deps/deps/trigger_rule_dep.py index c7e2982fffdd7..dbdf692e769ec 100644 --- a/airflow/ti_deps/deps/trigger_rule_dep.py +++ b/airflow/ti_deps/deps/trigger_rule_dep.py @@ -294,7 +294,7 @@ def _iter_upstream_conditions() -> Iterator[ColumnOperators]: ) if not past_depends_met: yield self._failing_status( - reason=("Task should be skipped but the the past depends are not met") + reason=("Task should be skipped but the past depends are not met") ) return changed = ti.set_state(new_state, session) diff --git a/airflow/utils/log/logging_mixin.py b/airflow/utils/log/logging_mixin.py index 59c1d7980c1e3..a3d3b4b00c71d 100644 --- a/airflow/utils/log/logging_mixin.py +++ b/airflow/utils/log/logging_mixin.py @@ -38,7 +38,7 @@ class SetContextPropagate(enum.Enum): :meta private: """ - # If a `set_context` function wants to _keep_ propagation set on it's logger it needs to return this + # If a `set_context` function wants to _keep_ propagation set on its logger it needs to return this # special value. MAINTAIN_PROPAGATE = object() # Don't use this one anymore! diff --git a/airflow/utils/process_utils.py b/airflow/utils/process_utils.py index f3104df918cdf..1f7c4771e8c02 100644 --- a/airflow/utils/process_utils.py +++ b/airflow/utils/process_utils.py @@ -94,7 +94,7 @@ def signal_procs(sig): + [str(p.pid) for p in all_processes_in_the_group] ) elif err_killpg.errno == errno.ESRCH: - # There is a rare condition that the process has not managed yet to change it's process + # There is a rare condition that the process has not managed yet to change its process # group. In this case os.killpg fails with ESRCH error # So we additionally send a kill signal to the process itself. logger.info( @@ -119,7 +119,7 @@ def signal_procs(sig): all_processes_in_the_group = parent.children(recursive=True) all_processes_in_the_group.append(parent) except psutil.NoSuchProcess: - # The process already exited, but maybe it's children haven't. + # The process already exited, but maybe its children haven't. all_processes_in_the_group = [] for proc in psutil.process_iter(): try: diff --git a/airflow/utils/task_group.py b/airflow/utils/task_group.py index 167eb53b71eaa..841abc710678a 100644 --- a/airflow/utils/task_group.py +++ b/airflow/utils/task_group.py @@ -482,7 +482,7 @@ def topological_sort(self, _include_subdag_tasks: bool = False): while graph_unsorted: # Go through each of the node/edges pairs in the unsorted graph. If a set of edges doesn't contain # any nodes that haven't been resolved, that is, that are still in the unsorted graph, remove the - # pair from the unsorted graph, and append it to the sorted graph. Note here that by using using + # pair from the unsorted graph, and append it to the sorted graph. Note here that by using # the values() method for iterating, a copy of the unsorted graph is used, allowing us to modify # the unsorted graph as we move through it. # diff --git a/airflow/www/fab_security/manager.py b/airflow/www/fab_security/manager.py index e9435b0303488..145e099d081b9 100644 --- a/airflow/www/fab_security/manager.py +++ b/airflow/www/fab_security/manager.py @@ -374,7 +374,7 @@ def oauth_user_info_getter(self, f): Decorator function to be the OAuth user info getter for all the providers. Receives provider and response return a dict with the information returned from the provider. - The returned user info dict should have it's keys with the same name as the User Model. + The returned user info dict should have its keys with the same name as the User Model. Use it like this an example for GitHub :: @@ -1096,7 +1096,7 @@ def security_cleanup(self, baseviews, menus): self.delete_resource(resource.name) def find_user(self, username=None, email=None): - """Generic function find a user by it's username or email.""" + """Generic function find a user by its username or email.""" raise NotImplementedError def get_role_permissions_from_db(self, role_id: int) -> list[Permission]: diff --git a/dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md b/dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md index d8f8d4c2c654c..8cd229066134d 100644 --- a/dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md +++ b/dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md @@ -136,7 +136,7 @@ candidates that you got from the `find-backtracking-candidates` command. This ** the next step is to narrow down the list of candidates to the one that is causing the backtracking. We narrow-down the list by "bisecting" the list. We remove half of the dependency limits and see if it -still works or not. It it works - we continue. If it does not work, we restore the removed half and remove +still works or not. It works - we continue. If it does not work, we restore the removed half and remove the other half. Rinse and repeat until there is only one dependency left - hopefully (sometimes you will need to leave few of them). diff --git a/dev/README_RELEASE_PROVIDER_PACKAGES.md b/dev/README_RELEASE_PROVIDER_PACKAGES.md index 92e848c38094a..928cc29196625 100644 --- a/dev/README_RELEASE_PROVIDER_PACKAGES.md +++ b/dev/README_RELEASE_PROVIDER_PACKAGES.md @@ -97,7 +97,7 @@ Details about maintaining the SEMVER version are going to be discussed and imple First thing that release manager has to do is to change version of the provider to a target version. Each provider has a `provider.yaml` file that, among others, stores information about provider versions. When you attempt to release a provider you should update that -information based on the changes for the provider, and it's `CHANGELOG.rst`. It might be that +information based on the changes for the provider, and its `CHANGELOG.rst`. It might be that `CHANGELOG.rst` already contains the right target version. This will be especially true if some changes in the provider add new features (then minor version is increased) or when the changes introduce backwards-incompatible, breaking change in the provider (then major version is diff --git a/dev/breeze/SELECTIVE_CHECKS.md b/dev/breeze/SELECTIVE_CHECKS.md index f2d2ab72b6725..d6e525485a97a 100644 --- a/dev/breeze/SELECTIVE_CHECKS.md +++ b/dev/breeze/SELECTIVE_CHECKS.md @@ -125,10 +125,10 @@ Github Actions to pass the list of parameters to a command to execute | all-python-versions | List of all python versions there are available in the form of JSON array | ['3.8', '3.9', '3.10'] | | | all-python-versions-list-as-string | List of all python versions there are available in the form of space separated string | 3.8 3.9 3.10 | * | | basic-checks-only | Whether to run all static checks ("false") or only basic set of static checks ("true") | false | | -| cache-directive | Which cache should be be used for images ("registry", "local" , "disabled") | registry | | +| cache-directive | Which cache should be used for images ("registry", "local" , "disabled") | registry | | | debug-resources | Whether resources usage should be printed during parallel job execution ("true"/ "false") | false | | -| default-branch | Which branch is default for the the build ("main" for main branch, "v2-4-test" for 2.4 line etc.) | main | | -| default-constraints-branch | Which branch is default for the the build ("constraints-main" for main branch, "constraints-2-4" etc.) | constraints-main | | +| default-branch | Which branch is default for the build ("main" for main branch, "v2-4-test" for 2.4 line etc.) | main | | +| default-constraints-branch | Which branch is default for the build ("constraints-main" for main branch, "constraints-2-4" etc.) | constraints-main | | | default-helm-version | Which Helm version to use as default | v3.9.4 | | | default-kind-version | Which Kind version to use as default | v0.16.0 | | | default-kubernetes-version | Which Kubernetes version to use as default | v1.25.2 | | diff --git a/dev/breeze/doc/adr/0003-bootstrapping-virtual-environment.md b/dev/breeze/doc/adr/0003-bootstrapping-virtual-environment.md index 5ade92a9c82b4..eb750460d60dc 100644 --- a/dev/breeze/doc/adr/0003-bootstrapping-virtual-environment.md +++ b/dev/breeze/doc/adr/0003-bootstrapping-virtual-environment.md @@ -87,7 +87,7 @@ Integration we run. It's been established practice of the CI is that the logic of the CI is stored in the same repository as the source code of the application it tests and part of the Breeze functions are shared with CI. -In the future when breeze stabilizes and it's update cadence will be +In the future when breeze stabilizes and its update cadence will be much slower (which is likele as it happened with the Breeze predecessor) there could be an option that Breeze is installed as separate package and same released Breeze version could be ued to manage multiple Airflow @@ -150,7 +150,7 @@ of Airflow) performs the following tasks: re-installation if new dependencies are used - which is not as seamlessly integrate in the regular development environment, and it might create some confusions for the - users who would have to learn `pipx` and it's commands. + users who would have to learn `pipx` and its commands. Another drawback of `pipx` is that installs one global version of breeze for all projects, where it is quite possible that someone has two different versions of @@ -192,7 +192,7 @@ The alternatives considered were: mostly "used" but not deliberately activated is a better choice - especially that most users will simply "use" breeze as an app rather than activate the environment deliberately. - Also choosing `pyenv` and it's virtualenv plugin would + Also choosing `pyenv` and its virtualenv plugin would add extra, unnecessary steps and prerequisites for Breeze. diff --git a/dev/breeze/src/airflow_breeze/commands/setup_commands.py b/dev/breeze/src/airflow_breeze/commands/setup_commands.py index 2668b1c319281..61e1f5ac7b57c 100644 --- a/dev/breeze/src/airflow_breeze/commands/setup_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/setup_commands.py @@ -297,7 +297,7 @@ def get_command_hash_export() -> str: f"[error]The `breeze {command} {subcommand}` is missing in rich-click options[/]" ) get_console().print( - "[info]Please add add it to rich_click.OPTION_GROUPS " + "[info]Please add it to rich_click.OPTION_GROUPS " "via one of the `*_commands_config.py` " "files in `dev/breeze/src/airflow_breeze/commands`[/]" ) diff --git a/dev/breeze/src/airflow_breeze/utils/cdxgen.py b/dev/breeze/src/airflow_breeze/utils/cdxgen.py index b03d7b8bb8587..b1a64efac8f97 100644 --- a/dev/breeze/src/airflow_breeze/utils/cdxgen.py +++ b/dev/breeze/src/airflow_breeze/utils/cdxgen.py @@ -201,7 +201,7 @@ def get_requirements_for_provider( ) get_console().print( f"[info]Provider {provider_id} has {len(provider_packages)} transitively " - f"dependent packages (excluding airflow and it's dependencies)" + f"dependent packages (excluding airflow and its dependencies)" ) get_console().print(provider_packages) provider_file = target_dir / provider_file_name diff --git a/dev/breeze/src/airflow_breeze/utils/md5_build_check.py b/dev/breeze/src/airflow_breeze/utils/md5_build_check.py index 54b46c99164c0..d1ec943785d42 100644 --- a/dev/breeze/src/airflow_breeze/utils/md5_build_check.py +++ b/dev/breeze/src/airflow_breeze/utils/md5_build_check.py @@ -32,7 +32,7 @@ def check_md5checksum_in_cache_modified(file_hash: str, cache_path: Path, update: bool) -> bool: """ - Check if the file hash is present in cache and it's content has been modified. Optionally updates + Check if the file hash is present in cache and its content has been modified. Optionally updates the hash. :param file_hash: hash of the current version of the file diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py index 211b92a0bc7f8..413fe8be71634 100644 --- a/dev/breeze/tests/test_selective_checks.py +++ b/dev/breeze/tests/test_selective_checks.py @@ -347,7 +347,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str): "upgrade-to-newer-dependencies": "false", "parallel-test-types-list-as-string": "Always Providers[airbyte,http]", }, - id="Providers tests tests run without amazon tests if no amazon file changed", + id="Providers tests run without amazon tests if no amazon file changed", ), pytest.param( ("airflow/providers/amazon/file.py",), diff --git a/dev/prepare_release_issue.py b/dev/prepare_release_issue.py index 4db602fce55b9..de50e910ebfb2 100755 --- a/dev/prepare_release_issue.py +++ b/dev/prepare_release_issue.py @@ -179,7 +179,7 @@ def render_template( keep_trailing_newline: bool = False, ) -> str: """ - Renders template based on it's name. Reads the template from _TEMPLATE.md.jinja2 in current dir. + Renders template based on its name. Reads the template from _TEMPLATE.md.jinja2 in current dir. :param template_name: name of the template to use :param context: Jinja2 context :param autoescape: Whether to autoescape HTML diff --git a/docs/apache-airflow-providers-amazon/connections/aws.rst b/docs/apache-airflow-providers-amazon/connections/aws.rst index b3b222913fcd8..ae41b104d3e85 100644 --- a/docs/apache-airflow-providers-amazon/connections/aws.rst +++ b/docs/apache-airflow-providers-amazon/connections/aws.rst @@ -746,7 +746,7 @@ Using IAM Roles for Service Accounts (IRSA) on EKS If you are running Airflow on `Amazon EKS `_, you can grant AWS related permission (such as S3 Read/Write for remote logging) to the Airflow service -by granting the IAM role to it's service account. +by granting the IAM role to its service account. IRSA provides fine-grained permission management for apps(e.g., pods) that run on EKS and use other AWS services. These could be apps that use S3, any other AWS services like Secrets Manager, CloudWatch, DynamoDB etc. @@ -779,7 +779,7 @@ Create IAM Role for Service Account(IRSA) using eksctl eksctl utils associate-iam-oidc-provider --cluster="" --approve -4. Replace ``EKS_CLUSTER_ID``, ``SERVICE_ACCOUNT_NAME`` and ``NAMESPACE`` and execute the the following command. +4. Replace ``EKS_CLUSTER_ID``, ``SERVICE_ACCOUNT_NAME`` and ``NAMESPACE`` and execute the following command. This command will use an existing EKS Cluster ID and create an IAM role, service account and namespace. .. code-block:: bash diff --git a/docs/apache-airflow-providers-apache-spark/connections/spark.rst b/docs/apache-airflow-providers-apache-spark/connections/spark.rst index 97a432c887af3..28249f3af9270 100644 --- a/docs/apache-airflow-providers-apache-spark/connections/spark.rst +++ b/docs/apache-airflow-providers-apache-spark/connections/spark.rst @@ -48,7 +48,7 @@ Extra (optional) When specifying the connection in environment variable you should specify it using URI syntax. -Note that all components of the URI should be URL-encoded. The URI and and the mongo +Note that all components of the URI should be URL-encoded. The URI and the mongo connection string are not the same. For example: diff --git a/docs/apache-airflow-providers-google/operators/cloud/bigquery_dts.rst b/docs/apache-airflow-providers-google/operators/cloud/bigquery_dts.rst index 46dbeaf14c273..0d3663419baad 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/bigquery_dts.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/bigquery_dts.rst @@ -44,7 +44,7 @@ In the case of Airflow, the customer needs to create a transfer config with the and then trigger a transfer run using a specialized Airflow operator that will call StartManualTransferRuns API for example :class:`~airflow.providers.google.cloud.operators.bigquery_dts.BigQueryDataTransferServiceStartTransferRunsOperator`. :class:`~airflow.providers.google.cloud.operators.bigquery_dts.BigQueryCreateDataTransferOperator` checks if automatic -scheduling option is present in passed configuration. If present then nothing is done, otherwise it's value is +scheduling option is present in passed configuration. If present then nothing is done, otherwise its value is set to ``True``. .. exampleinclude:: /../../tests/system/providers/google/cloud/bigquery/example_bigquery_dts.py diff --git a/docs/apache-airflow-providers-google/operators/cloud/kubernetes_engine.rst b/docs/apache-airflow-providers-google/operators/cloud/kubernetes_engine.rst index 373a196fd6672..8b51cdf06c8b1 100644 --- a/docs/apache-airflow-providers-google/operators/cloud/kubernetes_engine.rst +++ b/docs/apache-airflow-providers-google/operators/cloud/kubernetes_engine.rst @@ -135,7 +135,7 @@ Use of XCom ''''''''''' We can enable the usage of :ref:`XCom ` on the operator. This works by launching a sidecar container -with the pod specified. The sidecar is automatically mounted when the XCom usage is specified and it's mount point +with the pod specified. The sidecar is automatically mounted when the XCom usage is specified and its mount point is the path ``/airflow/xcom``. To provide values to the XCom, ensure your Pod writes it into a file called ``return.json`` in the sidecar. The contents of this can then be used downstream in your DAG. Here is an example of it being used: diff --git a/docs/apache-airflow-providers-microsoft-azure/operators/asb.rst b/docs/apache-airflow-providers-microsoft-azure/operators/asb.rst index 5ad6962418597..9adbbddc4c92e 100644 --- a/docs/apache-airflow-providers-microsoft-azure/operators/asb.rst +++ b/docs/apache-airflow-providers-microsoft-azure/operators/asb.rst @@ -70,7 +70,7 @@ Below is an example of using this operator to execute an Azure Service Bus Send Receive Message Azure Service Bus Queue ======================================== -To Receive Message or list of message or Batch message message in a Queue you can use +To Receive Message or list of message or Batch message in a Queue you can use :class:`~airflow.providers.microsoft.azure.operators.asb.AzureServiceBusReceiveMessageOperator`. Below is an example of using this operator to execute an Azure Service Bus Create Queue. diff --git a/docs/apache-airflow-providers-qubole/operators/qubole.rst b/docs/apache-airflow-providers-qubole/operators/qubole.rst index 162be4025ff83..31d6651a0cafd 100644 --- a/docs/apache-airflow-providers-qubole/operators/qubole.rst +++ b/docs/apache-airflow-providers-qubole/operators/qubole.rst @@ -71,7 +71,7 @@ To run jar file in your Hadoop cluster use Run Pig command """"""""""""""" -To run script script in *Pig Latin* in your Hadoop cluster use +To run script in *Pig Latin* in your Hadoop cluster use .. exampleinclude:: /../../tests/system/providers/qubole/example_qubole.py :language: python @@ -82,7 +82,7 @@ To run script script in *Pig Latin* in your Hadoop cluster use Run Shell command """"""""""""""""" -To run Shell-script script use +To run Shell-script use .. exampleinclude:: /../../tests/system/providers/qubole/example_qubole.py :language: python diff --git a/docs/apache-airflow-providers/howto/create-custom-providers.rst b/docs/apache-airflow-providers/howto/create-custom-providers.rst index 6fe27674f2267..7b2a2fcf4c1dd 100644 --- a/docs/apache-airflow-providers/howto/create-custom-providers.rst +++ b/docs/apache-airflow-providers/howto/create-custom-providers.rst @@ -86,7 +86,7 @@ Exposing customized functionality to the Airflow's core: * ``notifications`` - this field should contain the notification classes. See :doc:`apache-airflow:howto/notifications` for description of the notifications. -* ``executors`` - this field should contain the executor class class names. +* ``executors`` - this field should contain the executor class names. See :doc:`apache-airflow:core-concepts/executor/index` for description of the executors. * ``config`` - this field should contain dictionary that should conform to the diff --git a/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst b/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst index 224f4fdbeba3e..07f7dc651ad09 100644 --- a/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst +++ b/docs/apache-airflow/administration-and-deployment/logging-monitoring/metrics.rst @@ -199,7 +199,7 @@ Gauges =================================================== ======================================================================== Name Description =================================================== ======================================================================== -``dagbag_size`` Number of DAGs found when the scheduler ran a scan based on it's +``dagbag_size`` Number of DAGs found when the scheduler ran a scan based on its configuration ``dag_processing.import_errors`` Number of errors from trying to parse DAG files ``dag_processing.total_parse_time`` Seconds taken to scan and import ``dag_processing.file_path_queue_size`` DAG files diff --git a/docs/apache-airflow/best-practices.rst b/docs/apache-airflow/best-practices.rst index 6cb2857d96755..d62a9d948fd76 100644 --- a/docs/apache-airflow/best-practices.rst +++ b/docs/apache-airflow/best-practices.rst @@ -331,7 +331,7 @@ Example of watcher pattern with trigger rules --------------------------------------------- The watcher pattern is how we call a DAG with a task that is "watching" the states of the other tasks. -It's primary purpose is to fail a DAG Run when any other task fail. +Its primary purpose is to fail a DAG Run when any other task fail. The need came from the Airflow system tests that are DAGs with different tasks (similarly like a test containing steps). Normally, when any task fails, all other tasks are not executed and the whole DAG Run gets failed status too. But diff --git a/docs/apache-airflow/howto/docker-compose/index.rst b/docs/apache-airflow/howto/docker-compose/index.rst index 1ab56b2347887..696c83c765c21 100644 --- a/docs/apache-airflow/howto/docker-compose/index.rst +++ b/docs/apache-airflow/howto/docker-compose/index.rst @@ -364,7 +364,7 @@ runtime user id which is unknown at the time of building the image. | ``AIRFLOW_IMAGE_NAME`` | Airflow Image to use. | apache/airflow:|version| | +--------------------------------+-----------------------------------------------------+--------------------------+ | ``AIRFLOW_UID`` | UID of the user to run Airflow containers as. | ``50000`` | -| | Override if you want to use use non-default Airflow | | +| | Override if you want to use non-default Airflow | | | | UID (for example when you map folders from host, | | | | it should be set to result of ``id -u`` call. | | | | When it is changed, a user with the UID is | | diff --git a/docs/apache-airflow/howto/email-config.rst b/docs/apache-airflow/howto/email-config.rst index 014a2e2464fb2..c00d959d3bf2d 100644 --- a/docs/apache-airflow/howto/email-config.rst +++ b/docs/apache-airflow/howto/email-config.rst @@ -53,7 +53,7 @@ Equivalent environment variables look like: To configure SMTP settings, checkout the :ref:`SMTP ` section in the standard configuration. If you do not want to store the SMTP credentials in the config or in the environment variables, you can create a -connection called ``smtp_default`` of ``Email`` type, or choose a custom connection name and set the ``email_conn_id`` with it's name in +connection called ``smtp_default`` of ``Email`` type, or choose a custom connection name and set the ``email_conn_id`` with its name in the configuration & store SMTP username-password in it. Other SMTP settings like host, port etc always gets picked up from the configuration only. The connection can be of any type (for example 'HTTP connection'). diff --git a/docs/apache-airflow/howto/operator/datetime.rst b/docs/apache-airflow/howto/operator/datetime.rst index ac8644fe85972..53485f47cded8 100644 --- a/docs/apache-airflow/howto/operator/datetime.rst +++ b/docs/apache-airflow/howto/operator/datetime.rst @@ -35,7 +35,7 @@ Usage with current time The usages above might be useful in certain situations - for example when DAG is used to perform cleanups and maintenance and is not really supposed to be used for any DAGs that are supposed to be back-filled, -because the "current time" make back-filling non-idempotent, it's result depend on the time when the DAG +because the "current time" make back-filling non-idempotent, its result depend on the time when the DAG actually was run. It's also slightly non-deterministic potentially even if it is run on schedule. It can take some time between when the DAGRun was scheduled and executed and it might mean that even if the DAGRun was scheduled properly, the actual time used for branching decision will be different than the diff --git a/docs/apache-airflow/public-airflow-interface.rst b/docs/apache-airflow/public-airflow-interface.rst index add533f4ed2ad..e63ac8fdca39d 100644 --- a/docs/apache-airflow/public-airflow-interface.rst +++ b/docs/apache-airflow/public-airflow-interface.rst @@ -46,7 +46,7 @@ Airflow Interface and might change at any time. You can also use Airflow's Public Interface via the `Stable REST API `_ (based on the OpenAPI specification). For specific needs you can also use the -`Airflow Command Line Interface (CLI) `_ though it's behaviour might change +`Airflow Command Line Interface (CLI) `_ though its behaviour might change in details (such as output format and available flags) so if you want to rely on those in programmatic way, the Stable REST API is recommended. diff --git a/docs/apache-airflow/tutorial/fundamentals.rst b/docs/apache-airflow/tutorial/fundamentals.rst index d7d3acf2e8ebc..2c710530b2d6b 100644 --- a/docs/apache-airflow/tutorial/fundamentals.rst +++ b/docs/apache-airflow/tutorial/fundamentals.rst @@ -400,4 +400,4 @@ Here are a few things you might want to do next: .. seealso:: - Continue to the next step of the tutorial: :doc:`/tutorial/taskflow` - - Skip to the the :doc:`/core-concepts/index` section for detailed explanation of Airflow concepts such as DAGs, Tasks, Operators, and more + - Skip to the :doc:`/core-concepts/index` section for detailed explanation of Airflow concepts such as DAGs, Tasks, Operators, and more diff --git a/docs/docker-stack/entrypoint.rst b/docs/docker-stack/entrypoint.rst index 38b9ea22732f9..77648a6ba1e9b 100644 --- a/docs/docker-stack/entrypoint.rst +++ b/docs/docker-stack/entrypoint.rst @@ -57,7 +57,7 @@ The user can be any UID. In case UID is different than the default ``airflow`` (UID=50000), the user will be automatically created when entering the container. In order to accommodate a number of external libraries and projects, Airflow will automatically create -such an arbitrary user in (`/etc/passwd`) and make it's home directory point to ``/home/airflow``. +such an arbitrary user in (`/etc/passwd`) and make its home directory point to ``/home/airflow``. Many of 3rd-party libraries and packages require home directory of the user to be present, because they need to write some cache information there, so such a dynamic creation of a user is necessary. @@ -98,7 +98,7 @@ The entrypoint is waiting for a connection to the database independent of the da the stability of the environment. Waiting for connection involves executing ``airflow db check`` command, which means that a ``select 1 as is_alive;`` statement -is executed. Then it loops until the the command will be successful. +is executed. Then it loops until the command will be successful. It tries :envvar:`CONNECTION_CHECK_MAX_COUNT` times and sleeps :envvar:`CONNECTION_CHECK_SLEEP_TIME` between checks To disable check, set ``CONNECTION_CHECK_MAX_COUNT=0``. @@ -330,7 +330,7 @@ The entrypoint can also create webserver user automatically when you enter it. y production, it is only useful if you would like to run a quick test with the production image. You need to pass at least password to create such user via ``_AIRFLOW_WWW_USER_PASSWORD`` or :envvar:`_AIRFLOW_WWW_USER_PASSWORD_CMD` similarly like for other ``*_CMD`` variables, the content of -the ``*_CMD`` will be evaluated as shell command and it's output will be set as password. +the ``*_CMD`` will be evaluated as shell command and its output will be set as password. User creation will fail if none of the ``PASSWORD`` variables are set - there is no default for password for security reasons. diff --git a/scripts/ci/pre_commit/pre_commit_checkout_no_credentials.py b/scripts/ci/pre_commit/pre_commit_checkout_no_credentials.py index 6bdcc58040383..f0406d43749d6 100755 --- a/scripts/ci/pre_commit/pre_commit_checkout_no_credentials.py +++ b/scripts/ci/pre_commit/pre_commit_checkout_no_credentials.py @@ -74,7 +74,7 @@ def check_file(the_file: Path) -> int: if total_err_num: console.print( """ -[red]There are are some checkout instructions in github workflows that have no "persist_credentials" +[red]There are some checkout instructions in github workflows that have no "persist_credentials" set to False.[/] For security reasons - make sure all of the checkout actions have persist_credentials set, similar to: diff --git a/scripts/ci/pre_commit/pre_commit_update_common_sql_api_stubs.py b/scripts/ci/pre_commit/pre_commit_update_common_sql_api_stubs.py index 1a02ffad6a668..b8fead2861d5e 100755 --- a/scripts/ci/pre_commit/pre_commit_update_common_sql_api_stubs.py +++ b/scripts/ci/pre_commit/pre_commit_update_common_sql_api_stubs.py @@ -299,7 +299,7 @@ def compare_stub_files(generated_stub_path: Path, force_override: bool) -> tuple # This is automatically generated stub for the `common.sql` provider # # This file is generated automatically by the `update-common-sql-api stubs` pre-commit -# and the .pyi file represents part of the the "public" API that the +# and the .pyi file represents part of the "public" API that the # `common.sql` provider exposes to other providers. # # Any, potentially breaking change in the stubs will require deliberate manual action from the contributor diff --git a/setup.py b/setup.py index 9ded977624d14..2758cf0fda26b 100644 --- a/setup.py +++ b/setup.py @@ -868,7 +868,7 @@ def replace_extra_dependencies_with_provider_packages(extra: str, providers: lis So transitively 'salesforce' extra has all the dependencies it needs and in case the provider changes its dependencies, they will transitively change as well. - In the constraint mechanism we save both - provider versions and it's dependencies + In the constraint mechanism we save both - provider versions and its dependencies version, which means that installation using constraints is repeatable. For K8s and Celery which are both "Core executors" and "Providers" we have to diff --git a/tests/jobs/test_triggerer_job_logging.py b/tests/jobs/test_triggerer_job_logging.py index d11f4bb3686f7..52ff22c2f525c 100644 --- a/tests/jobs/test_triggerer_job_logging.py +++ b/tests/jobs/test_triggerer_job_logging.py @@ -171,7 +171,7 @@ def test_configure_trigger_log_handler_not_file_task_handler(cfg, cls, msg): """ No root handler configured. When non FileTaskHandler is configured, don't modify. - When when an incompatible subclass of FileTaskHandler is configured, don't modify. + When an incompatible subclass of FileTaskHandler is configured, don't modify. """ # reset handlers root_logger = logging.getLogger() diff --git a/tests/models/test_dag.py b/tests/models/test_dag.py index 87835c8adcc4f..342d08785197e 100644 --- a/tests/models/test_dag.py +++ b/tests/models/test_dag.py @@ -3822,7 +3822,7 @@ def test_clear_upstream_not_your_setup(self): the setup (and its teardown) will be cleared even though strictly speaking you don't "require" it since, depending on speed of execution, it might be torn down by t1 before / while w2 runs. It just gets cleared by virtue of it being upstream, and - that's what you requested. And it's teardown gets cleared too. But w1 doesn't. + that's what you requested. And its teardown gets cleared too. But w1 doesn't. """ with DAG(dag_id="test_dag", start_date=pendulum.now()) as dag: s1, w1, w2, t1 = self.make_tasks(dag, "s1, w1, w2, t1") diff --git a/tests/providers/common/sql/hooks/test_sql.py b/tests/providers/common/sql/hooks/test_sql.py index 72684e5e00b8e..d866ddce0ec4c 100644 --- a/tests/providers/common/sql/hooks/test_sql.py +++ b/tests/providers/common/sql/hooks/test_sql.py @@ -156,7 +156,7 @@ def get_cursor_descriptions(fields: list[str]) -> list[tuple[str]]: ([[1, 2], [11, 12]], [[3, 4], [13, 14]]), [[("id2",), ("value2",)]], [[3, 4], [13, 14]], - id="The return_last set set on multiple queries in list", + id="The return_last set on multiple queries in list", ), pytest.param( False, diff --git a/tests/providers/databricks/hooks/test_databricks_sql.py b/tests/providers/databricks/hooks/test_databricks_sql.py index ecc0385278557..ae2c07773cf5b 100644 --- a/tests/providers/databricks/hooks/test_databricks_sql.py +++ b/tests/providers/databricks/hooks/test_databricks_sql.py @@ -156,7 +156,7 @@ def get_cursor_descriptions(fields: list[str]) -> list[tuple[str]]: ([[1, 2], [11, 12]], [[3, 4], [13, 14]]), [[("id2",), ("value2",)]], [[3, 4], [13, 14]], - id="The return_last set set on multiple queries in list", + id="The return_last set on multiple queries in list", ), pytest.param( False, diff --git a/tests/providers/exasol/hooks/test_sql.py b/tests/providers/exasol/hooks/test_sql.py index 465edc61314b6..6716e91708a40 100644 --- a/tests/providers/exasol/hooks/test_sql.py +++ b/tests/providers/exasol/hooks/test_sql.py @@ -218,7 +218,7 @@ def get_columns(fields: list[str]) -> dict[str, dict[str, Any]]: ] ], [[3, 4], [13, 14]], - id="The return_last set set on multiple queries in list", + id="The return_last set on multiple queries in list", ), pytest.param( False, diff --git a/tests/providers/snowflake/hooks/test_sql.py b/tests/providers/snowflake/hooks/test_sql.py index 629cbadfdce85..b2143a1e1f0a9 100644 --- a/tests/providers/snowflake/hooks/test_sql.py +++ b/tests/providers/snowflake/hooks/test_sql.py @@ -151,7 +151,7 @@ def get_cursor_descriptions(fields: list[str]) -> list[tuple[str]]: [[("id2",), ("value2",)]], [[3, 4], [13, 14]], False, - id="The return_last set set on multiple queries in list", + id="The return_last set on multiple queries in list", ), pytest.param( False, diff --git a/tests/system/providers/amazon/CONTRIBUTING.md b/tests/system/providers/amazon/CONTRIBUTING.md index a06163ad58b35..b9aaaa66915c2 100644 --- a/tests/system/providers/amazon/CONTRIBUTING.md +++ b/tests/system/providers/amazon/CONTRIBUTING.md @@ -151,7 +151,7 @@ descriptive. For example, `example_redshift_cluster.py` and `example_redshift_s ## Environment ID -`ENV_ID` should be set via the the `SystemTestContextBuilder` and not manually. This +`ENV_ID` should be set via the `SystemTestContextBuilder` and not manually. This value should be used as part of any value which is test-specific such as the name of an S3 bucket being created. For example, `BUCKET_NAME = f'{ENV_ID}-test-bucket'`. diff --git a/tests/system/providers/google/cloud/datapipelines/resources/kinglear.txt b/tests/system/providers/google/cloud/datapipelines/resources/kinglear.txt index 91aef48fab7b9..a863ef33d227e 100644 --- a/tests/system/providers/google/cloud/datapipelines/resources/kinglear.txt +++ b/tests/system/providers/google/cloud/datapipelines/resources/kinglear.txt @@ -4044,7 +4044,7 @@ sound An unknown opposite. Thou art not vanquish'd, But cozen'd and beguil'd. Alb. Shut your mouth, dame, - Or with this paper shall I stop it. [Shows her her letter to + Or with this paper shall I stop it. [Shows her letter to Edmund.]- [To Edmund]. Hold, sir. [To Goneril] Thou worse than any name, read thine own evil. No tearing, lady! I perceive you know it. diff --git a/tests/system/providers/google/cloud/sql_to_sheets/example_sql_to_sheets.py b/tests/system/providers/google/cloud/sql_to_sheets/example_sql_to_sheets.py index 4ff49ec9aab74..e3d2dc711ee4e 100644 --- a/tests/system/providers/google/cloud/sql_to_sheets/example_sql_to_sheets.py +++ b/tests/system/providers/google/cloud/sql_to_sheets/example_sql_to_sheets.py @@ -56,7 +56,7 @@ # Sheet Finally, you need a Google Sheet you have access to, for testing you can -create a public sheet and get it's ID. +create a public sheet and get its ID. # Tear Down You can delete the db with diff --git a/tests/test_utils/mock_executor.py b/tests/test_utils/mock_executor.py index 96d42152c24ea..91e49e07febb7 100644 --- a/tests/test_utils/mock_executor.py +++ b/tests/test_utils/mock_executor.py @@ -95,7 +95,7 @@ def mock_task_fail(self, dag_id, task_id, run_id: str, try_number=1): FAILED. If the task identified by the tuple ``(dag_id, task_id, date, - try_number)`` is run by this executor it's state will be FAILED. + try_number)`` is run by this executor its state will be FAILED. """ assert isinstance(run_id, str) self.mock_task_results[TaskInstanceKey(dag_id, task_id, run_id, try_number)] = State.FAILED diff --git a/tests/test_utils/system_tests_class.py b/tests/test_utils/system_tests_class.py index 785cc1c815a54..7b07857ebe771 100644 --- a/tests/test_utils/system_tests_class.py +++ b/tests/test_utils/system_tests_class.py @@ -131,7 +131,7 @@ def _print_all_log_files(): def run_dag(self, dag_id: str, dag_folder: str = DEFAULT_DAG_FOLDER) -> None: """ - Runs example dag by it's ID. + Runs example dag by its ID. :param dag_id: id of a DAG to be run :param dag_folder: directory where to look for the specific DAG. Relative to AIRFLOW_HOME. diff --git a/tests/utils/test_db_cleanup.py b/tests/utils/test_db_cleanup.py index 80419cb9437a3..f9cbc25febd30 100644 --- a/tests/utils/test_db_cleanup.py +++ b/tests/utils/test_db_cleanup.py @@ -231,7 +231,7 @@ def test__cleanup_table(self, table_name, date_add_kwargs, expected_to_delete, e Verify that _cleanup_table actually deletes the rows it should. TaskInstance represents the "normal" case. DagRun is the odd case where we want - to keep the last non-externally-triggered DagRun record even if if it should be + to keep the last non-externally-triggered DagRun record even if it should be deleted according to the provided timestamp. We also verify that the "on delete cascade" behavior is as expected. Some tables diff --git a/tests/utils/test_sqlalchemy.py b/tests/utils/test_sqlalchemy.py index 2bf4ad1be5bbd..84eb3c8093d52 100644 --- a/tests/utils/test_sqlalchemy.py +++ b/tests/utils/test_sqlalchemy.py @@ -272,7 +272,7 @@ def test_bind_processor(self, input, expected): mock_dialect.dbapi = None process = config_type.bind_processor(mock_dialect) assert pickle.loads(process(input)) == expected - assert pickle.loads(process(input)) == expected, "should should not mutate variable" + assert pickle.loads(process(input)) == expected, "should not mutate variable" @pytest.mark.parametrize( "input",