Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Order triggers by - TI priority_weight when assign unassigned triggers #32318

Merged
merged 6 commits into from Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion airflow/models/trigger.py
Expand Up @@ -241,8 +241,9 @@ def assign_unassigned(cls, triggerer_id, capacity, heartrate, session: Session =
def get_sorted_triggers(cls, capacity, alive_triggerer_ids, session):
return with_row_locks(
session.query(cls.id)
.join(TaskInstance, cls.id == TaskInstance.trigger_id, isouter=True)
.filter(or_(cls.triggerer_id.is_(None), cls.triggerer_id.notin_(alive_triggerer_ids)))
.order_by(cls.created_date)
.order_by(-TaskInstance.priority_weight, cls.created_date)
Copy link
Member

@uranusjr uranusjr Jul 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
.order_by(-TaskInstance.priority_weight, cls.created_date)
.order_by(TaskInstance.priority_weight.desc(), cls.created_date)

Preferred style.

Can this be NULL and how do we want to handle them?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For NULL, I can use TaskInstance.priority_weight.desc().nulls_last(), but it seems like the lock (with_for_update) doesn't work with the outer join, I will try to find a solution for that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use CASE instead? We don’t really need to put them last, just probably not at the very beginning.

.limit(capacity),
session,
skip_locked=True,
Expand Down
88 changes: 79 additions & 9 deletions tests/models/test_trigger.py
Expand Up @@ -243,31 +243,101 @@ def test_assign_unassigned_missing_heartbeat(session, create_task_instance, chec
second_triggerer.latest_heartbeat += datetime.timedelta(seconds=check_triggerer_heartrate)


def test_get_sorted_triggers(session, create_task_instance):
def test_get_sorted_triggers_same_priority_weight(session, create_task_instance):
"""
Tests that triggers are sorted by the creation_date.
Tests that triggers are sorted by the creation_date if they have the same priority.
"""
old_execution_date = datetime.datetime(
2023, 5, 9, 12, 16, 14, 474415, tzinfo=pytz.timezone("Africa/Abidjan")
)
trigger_old = Trigger(
classpath="airflow.triggers.testing.SuccessTrigger",
kwargs={},
created_date=datetime.datetime(
2023, 5, 9, 12, 16, 14, 474415, tzinfo=pytz.timezone("Africa/Abidjan")
),
created_date=old_execution_date + datetime.timedelta(seconds=30),
)
trigger_old.id = 1
session.add(trigger_old)
TI_old = create_task_instance(
task_id="old",
execution_date=old_execution_date,
run_id="old_run_id",
)
TI_old.priority_weight = 1
TI_old.trigger_id = trigger_old.id
session.add(TI_old)

new_execution_date = datetime.datetime(
2023, 5, 9, 12, 17, 14, 474415, tzinfo=pytz.timezone("Africa/Abidjan")
)
trigger_new = Trigger(
classpath="airflow.triggers.testing.SuccessTrigger",
kwargs={},
created_date=datetime.datetime(
2023, 5, 9, 12, 17, 14, 474415, tzinfo=pytz.timezone("Africa/Abidjan")
),
created_date=new_execution_date + datetime.timedelta(seconds=30),
)
trigger_new.id = 2
session.add(trigger_old)
session.add(trigger_new)
TI_new = create_task_instance(
task_id="new",
execution_date=new_execution_date,
run_id="new_run_id",
)
TI_new.priority_weight = 1
TI_new.trigger_id = trigger_new.id
session.add(TI_new)

session.commit()
assert session.query(Trigger).count() == 2

trigger_ids_query = Trigger.get_sorted_triggers(capacity=100, alive_triggerer_ids=[], session=session)

assert trigger_ids_query == [(1,), (2,)]


def test_get_sorted_triggers_different_priority_weights(session, create_task_instance):
"""
Tests that triggers are sorted by the priority_weight.
"""
old_execution_date = datetime.datetime(
2023, 5, 9, 12, 16, 14, 474415, tzinfo=pytz.timezone("Africa/Abidjan")
)
trigger_old = Trigger(
classpath="airflow.triggers.testing.SuccessTrigger",
kwargs={},
created_date=old_execution_date + datetime.timedelta(seconds=30),
)
trigger_old.id = 1
session.add(trigger_old)
TI_old = create_task_instance(
task_id="old",
execution_date=old_execution_date,
run_id="old_run_id",
)
TI_old.priority_weight = 1
TI_old.trigger_id = trigger_old.id
session.add(TI_old)

new_execution_date = datetime.datetime(
2023, 5, 9, 12, 17, 14, 474415, tzinfo=pytz.timezone("Africa/Abidjan")
)
trigger_new = Trigger(
classpath="airflow.triggers.testing.SuccessTrigger",
kwargs={},
created_date=new_execution_date + datetime.timedelta(seconds=30),
)
trigger_new.id = 2
session.add(trigger_new)
TI_new = create_task_instance(
task_id="new",
execution_date=new_execution_date,
run_id="new_run_id",
)
TI_new.priority_weight = 2
TI_new.trigger_id = trigger_new.id
session.add(TI_new)

session.commit()
assert session.query(Trigger).count() == 2

trigger_ids_query = Trigger.get_sorted_triggers(capacity=100, alive_triggerer_ids=[], session=session)

assert trigger_ids_query == [(2,), (1,)]