-
Notifications
You must be signed in to change notification settings - Fork 16.8k
Feature/add max new dagruns to schedule #64294
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
2bce943
03bacfb
0399261
c70108c
c5431b9
36e0514
25ced1c
bdf59c7
920f06f
0f6948c
eec2612
9de6190
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -39,6 +39,7 @@ | |
| Index, | ||
| Integer, | ||
| PrimaryKeyConstraint, | ||
| SQLColumnExpression, | ||
| String, | ||
| Text, | ||
| UniqueConstraint, | ||
|
|
@@ -56,7 +57,15 @@ | |
| from sqlalchemy.ext.associationproxy import association_proxy | ||
| from sqlalchemy.ext.hybrid import hybrid_property | ||
| from sqlalchemy.ext.mutable import MutableDict | ||
| from sqlalchemy.orm import Mapped, declared_attr, joinedload, mapped_column, relationship, synonym, validates | ||
| from sqlalchemy.orm import ( | ||
| Mapped, | ||
| declared_attr, | ||
| joinedload, | ||
| mapped_column, | ||
| relationship, | ||
| synonym, | ||
| validates, | ||
| ) | ||
| from sqlalchemy.sql.expression import false, select | ||
| from sqlalchemy.sql.functions import coalesce | ||
|
|
||
|
|
@@ -313,6 +322,11 @@ class DagRun(Base, LoggingMixin): | |
| "max_dagruns_per_loop_to_schedule", | ||
| fallback=20, | ||
| ) | ||
| DEFAULT_NEW_DAGRUNS_TO_EXAMINE = airflow_conf.getint( | ||
| "scheduler", | ||
| "max_new_dagruns_per_loop_to_schedule", | ||
| fallback=0, | ||
| ) | ||
| _ti_dag_versions = association_proxy("task_instances", "dag_version") | ||
| _tih_dag_versions = association_proxy("task_instances_histories", "dag_version") | ||
|
|
||
|
|
@@ -615,7 +629,7 @@ def active_runs_of_dags( | |
|
|
||
| @classmethod | ||
| @retry_db_transaction | ||
| def get_running_dag_runs_to_examine(cls, session: Session) -> ScalarResult[DagRun]: | ||
| def get_running_dag_runs_to_examine(cls, session: Session) -> Sequence[DagRun]: | ||
| """ | ||
| Return the next DagRuns that the scheduler should attempt to schedule. | ||
|
|
||
|
|
@@ -628,27 +642,65 @@ def get_running_dag_runs_to_examine(cls, session: Session) -> ScalarResult[DagRu | |
| from airflow.models.backfill import BackfillDagRun | ||
| from airflow.models.dag import DagModel | ||
|
|
||
| query = ( | ||
| select(cls) | ||
| .with_hint(cls, "USE INDEX (idx_dag_run_running_dags)", dialect_name="mysql") | ||
| .where(cls.state == DagRunState.RUNNING) | ||
| .join(DagModel, DagModel.dag_id == cls.dag_id) | ||
| .join(BackfillDagRun, BackfillDagRun.dag_run_id == DagRun.id, isouter=True) | ||
| .where( | ||
| DagModel.is_paused == false(), | ||
| DagModel.is_stale == false(), | ||
| ) | ||
| .order_by( | ||
| nulls_first(cast("ColumnElement[Any]", BackfillDagRun.sort_ordinal), session=session), | ||
| nulls_first(cast("ColumnElement[Any]", cls.last_scheduling_decision), session=session), | ||
| cls.run_after, | ||
| def _get_dagrun_query( | ||
| filters: list[ColumnElement[bool]], order_by: list[SQLColumnExpression[Any]], limit: int | ||
| ): | ||
| return ( | ||
| select(DagRun) | ||
| .with_hint(DagRun, "USE INDEX (idx_dag_run_running_dags)", dialect_name="mysql") | ||
| .where(DagRun.state == DagRunState.RUNNING) | ||
| .join(DagModel, DagModel.dag_id == cls.dag_id) | ||
| .join(BackfillDagRun, BackfillDagRun.dag_run_id == DagRun.id, isouter=True) | ||
| .where(*filters) | ||
| .order_by(*order_by) | ||
| .limit(limit) | ||
| ) | ||
| .limit(cls.DEFAULT_DAGRUNS_TO_EXAMINE) | ||
|
|
||
| filters = [ | ||
| DagRun.run_after <= func.now(), | ||
| DagModel.is_paused == false(), | ||
| DagModel.is_stale == false(), | ||
| ] | ||
|
|
||
| order = [ | ||
| nulls_first(cast("ColumnElement[Any]", BackfillDagRun.sort_ordinal), session=session), | ||
| nulls_first(cast("ColumnElement[Any]", DagRun.last_scheduling_decision), session=session), | ||
| DagRun.run_after, | ||
| ] | ||
|
|
||
| new_dagruns_to_examine = cls.DEFAULT_NEW_DAGRUNS_TO_EXAMINE | ||
| dagruns_to_examine = cls.DEFAULT_DAGRUNS_TO_EXAMINE | ||
|
|
||
| if new_dagruns_to_examine < 0: | ||
| log.warning("'max_new_dagruns_per_loop_to_schedule' is smaller than 0, ignoring configuration") | ||
| new_dagruns_to_examine = 0 | ||
|
Comment on lines
+671
to
+676
|
||
|
|
||
| query = _get_dagrun_query( | ||
| filters=filters | ||
| if new_dagruns_to_examine == 0 | ||
| else [*filters, DagRun.last_scheduling_decision.is_not(None)], | ||
| order_by=order, | ||
| limit=dagruns_to_examine, | ||
| ) | ||
|
|
||
| query = query.where(DagRun.run_after <= func.now()) | ||
| result: Sequence[DagRun] = ( | ||
| session.scalars(with_row_locks(query, of=cls, session=session, skip_locked=True)).unique().all() | ||
| ) | ||
|
|
||
| if new_dagruns_to_examine > 0: | ||
| new_dagruns_query = _get_dagrun_query( | ||
| filters=[*filters, DagRun.last_scheduling_decision.is_(None)], | ||
| order_by=order, | ||
| limit=new_dagruns_to_examine, | ||
| ) | ||
| new_dagruns: Sequence[DagRun] = ( | ||
| session.scalars(with_row_locks(new_dagruns_query, of=cls, session=session, skip_locked=True)) | ||
| .unique() | ||
| .all() | ||
| ) | ||
|
|
||
| result = [*result, *new_dagruns] | ||
|
|
||
| result = session.scalars(with_row_locks(query, of=cls, session=session, skip_locked=True)).unique() | ||
| return result | ||
|
|
||
| @classmethod | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -993,6 +993,121 @@ def test_wait_for_downstream(self, dag_maker, session, prev_ti_state, is_ti_sche | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| schedulable_tis = [ti.task_id for ti in decision.schedulable_tis] | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| assert (upstream.task_id in schedulable_tis) == is_ti_schedulable | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| def test_get_running_dag_runs_ignores_new_dagruns_to_examine_when_smaller_than_0( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| self, session, dag_maker | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ): | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| DagRun.DEFAULT_NEW_DAGRUNS_TO_EXAMINE = 0 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+997
to
+1000
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| self, session, dag_maker | |
| ): | |
| DagRun.DEFAULT_NEW_DAGRUNS_TO_EXAMINE = 0 | |
| self, session, dag_maker, monkeypatch | |
| ): | |
| monkeypatch.setattr(DagRun, "DEFAULT_NEW_DAGRUNS_TO_EXAMINE", 0) |
Copilot
AI
Apr 2, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test name implies it covers the "< 0" configuration path, but it sets DagRun.DEFAULT_NEW_DAGRUNS_TO_EXAMINE = 0, so the warning/clamping branch is never exercised. Set a negative value here (e.g. -1) and assert the expected warning (via caplog) to actually cover the behavior.
| self, session, dag_maker | |
| ): | |
| DagRun.DEFAULT_NEW_DAGRUNS_TO_EXAMINE = 0 | |
| def create_dagruns( | |
| last_scheduling_decision: datetime.datetime | None = None, | |
| count: int = 20, | |
| ): | |
| dagrun = dag_maker.create_dagrun( | |
| run_type=DagRunType.SCHEDULED, | |
| state=State.RUNNING, | |
| run_after=datetime.datetime(2024, 1, 1), | |
| ) | |
| dagrun.last_scheduling_decision = last_scheduling_decision | |
| session.merge(dagrun) | |
| for _ in range(count - 1): | |
| dagrun = dag_maker.create_dagrun_after( | |
| dagrun, | |
| run_type=DagRunType.SCHEDULED, | |
| state=State.RUNNING, | |
| run_after=datetime.datetime(2024, 1, 1), | |
| ) | |
| dagrun.last_scheduling_decision = last_scheduling_decision | |
| session.merge(dagrun) | |
| with dag_maker( | |
| dag_id="dummy_dag", | |
| schedule=datetime.timedelta(days=1), | |
| start_date=datetime.datetime(2024, 1, 1), | |
| session=session, | |
| ): | |
| EmptyOperator(task_id="dummy_task") | |
| create_dagruns(None, 10) | |
| with dag_maker( | |
| dag_id="dummy_dag2", | |
| schedule=datetime.timedelta(days=1), | |
| start_date=datetime.datetime(2024, 1, 1), | |
| session=session, | |
| ): | |
| EmptyOperator(task_id="dummy_task2") | |
| create_dagruns(func.now(), 20) | |
| session.flush() | |
| dagruns = list(DagRun.get_running_dag_runs_to_examine(session=session)) | |
| assert len([dagrun for dagrun in dagruns if dagrun.last_scheduling_decision is None]) == 10 | |
| assert len([dagrun for dagrun in dagruns if dagrun.last_scheduling_decision is not None]) == 10 | |
| self, session, dag_maker, caplog | |
| ): | |
| original_value = DagRun.DEFAULT_NEW_DAGRUNS_TO_EXAMINE | |
| try: | |
| # Set a negative value to exercise the "< 0" clamping and warning path. | |
| DagRun.DEFAULT_NEW_DAGRUNS_TO_EXAMINE = -1 | |
| # Capture warnings emitted when handling the negative configuration value. | |
| caplog.set_level("WARNING", logger="airflow.models.dagrun") | |
| def create_dagruns( | |
| last_scheduling_decision: datetime.datetime | None = None, | |
| count: int = 20, | |
| ): | |
| dagrun = dag_maker.create_dagrun( | |
| run_type=DagRunType.SCHEDULED, | |
| state=State.RUNNING, | |
| run_after=datetime.datetime(2024, 1, 1), | |
| ) | |
| dagrun.last_scheduling_decision = last_scheduling_decision | |
| session.merge(dagrun) | |
| for _ in range(count - 1): | |
| dagrun = dag_maker.create_dagrun_after( | |
| dagrun, | |
| run_type=DagRunType.SCHEDULED, | |
| state=State.RUNNING, | |
| run_after=datetime.datetime(2024, 1, 1), | |
| ) | |
| dagrun.last_scheduling_decision = last_scheduling_decision | |
| session.merge(dagrun) | |
| with dag_maker( | |
| dag_id="dummy_dag", | |
| schedule=datetime.timedelta(days=1), | |
| start_date=datetime.datetime(2024, 1, 1), | |
| session=session, | |
| ): | |
| EmptyOperator(task_id="dummy_task") | |
| create_dagruns(None, 10) | |
| with dag_maker( | |
| dag_id="dummy_dag2", | |
| schedule=datetime.timedelta(days=1), | |
| start_date=datetime.datetime(2024, 1, 1), | |
| session=session, | |
| ): | |
| EmptyOperator(task_id="dummy_task2") | |
| create_dagruns(func.now(), 20) | |
| session.flush() | |
| dagruns = list(DagRun.get_running_dag_runs_to_examine(session=session)) | |
| # Verify that the negative value was ignored/clamped by checking for the warning. | |
| assert any( | |
| "DEFAULT_NEW_DAGRUNS_TO_EXAMINE" in record.getMessage() | |
| and ("negative" in record.getMessage() or "< 0" in record.getMessage()) | |
| for record in caplog.records | |
| ) | |
| assert len([dagrun for dagrun in dagruns if dagrun.last_scheduling_decision is None]) == 10 | |
| assert len([dagrun for dagrun in dagruns if dagrun.last_scheduling_decision is not None]) == 10 | |
| finally: | |
| DagRun.DEFAULT_NEW_DAGRUNS_TO_EXAMINE = original_value |
Copilot
AI
Apr 2, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same issue here: DagRun.DEFAULT_NEW_DAGRUNS_TO_EXAMINE is modified without being restored, which can leak state across tests. Please scope this via monkeypatch or restore the previous value in a finally block.
| def test_get_running_dag_runs_with_max_new_dagruns_to_examine(self, session, dag_maker): | |
| DagRun.DEFAULT_NEW_DAGRUNS_TO_EXAMINE = 10 | |
| def test_get_running_dag_runs_with_max_new_dagruns_to_examine(self, session, dag_maker, monkeypatch): | |
| monkeypatch.setattr(DagRun, "DEFAULT_NEW_DAGRUNS_TO_EXAMINE", 10) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The warning message for negative
max_new_dagruns_per_loop_to_schedulecould be more actionable if it included the configured value and the fallback behavior (treated as 0). Also, “less than 0” reads more naturally than “smaller than 0”.