-
Notifications
You must be signed in to change notification settings - Fork 17.1k
[AIRFLOW-558] Add Support for dag.backfill=(True|False) Option #1830
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1023,12 +1023,28 @@ def are_dependents_done(self, session=None): | |
| @provide_session | ||
| def previous_ti(self, session=None): | ||
| """ The task instance for the task that ran before this task instance """ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Much cleaner. Thanks!
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Going to have to dig into this. Apparently, moving to a DR based previous_ti causes issues when NOT using a DR (backfill). Finally figure that out this afternoon. Will attempt to fix tonight. |
||
| return session.query(TaskInstance).filter( | ||
| TaskInstance.dag_id == self.dag_id, | ||
| TaskInstance.task_id == self.task.task_id, | ||
| TaskInstance.execution_date == | ||
| self.task.dag.previous_schedule(self.execution_date), | ||
| ).first() | ||
|
|
||
| dag = self.task.dag | ||
| if dag: | ||
| dr = self.get_dagrun(session=session) | ||
| if not dr: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand why you are doing this, although the comment does not reflect the requirement I think. Please mark it as FIXME (it should be removed in the future / throw an exception) |
||
| # Means that this TI is NOT being run from a DR, but from a catchup | ||
| previous_scheduled_date = dag.previous_schedule(self.execution_date) | ||
| if not previous_scheduled_date: | ||
| return None | ||
| else: | ||
| return TaskInstance(task=self.task, execution_date=previous_scheduled_date) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This means that whether or not there is a "previous_scheduled_date" (i.e. there won't be one by convention before start_date) one always gets a TaskInstance. I don't think that should be the case. |
||
|
|
||
| if dag.catchup: | ||
| last_dagrun = dr.get_previous_scheduled_dagrun(session=session) if dr else None | ||
|
|
||
| else: | ||
| last_dagrun = dr.get_previous_dagrun(session=session) if dr else None | ||
|
|
||
| if last_dagrun: | ||
| return last_dagrun.get_task_instance(self.task_id, session=session) | ||
|
|
||
| return None | ||
|
|
||
| @provide_session | ||
| def are_dependencies_met( | ||
|
|
@@ -2540,6 +2556,8 @@ class DAG(BaseDag, LoggingMixin): | |
| :type sla_miss_callback: types.FunctionType | ||
| :param orientation: Specify DAG orientation in graph view (LR, TB, RL, BT) | ||
| :type orientation: string | ||
| :param catchup: Perform scheduler catchup (or only run latest)? Defaults to True | ||
| "type catchup: bool" | ||
| """ | ||
|
|
||
| def __init__( | ||
|
|
@@ -2557,6 +2575,7 @@ def __init__( | |
| dagrun_timeout=None, | ||
| sla_miss_callback=None, | ||
| orientation=configuration.get('webserver', 'dag_orientation'), | ||
| catchup=configuration.getboolean('scheduler', 'catchup_by_default'), | ||
| params=None): | ||
|
|
||
| self.user_defined_macros = user_defined_macros | ||
|
|
@@ -2597,6 +2616,7 @@ def __init__( | |
| self.dagrun_timeout = dagrun_timeout | ||
| self.sla_miss_callback = sla_miss_callback | ||
| self.orientation = orientation | ||
| self.catchup = catchup | ||
|
|
||
| self._comps = { | ||
| 'dag_id', | ||
|
|
@@ -3847,6 +3867,29 @@ def get_dag(self): | |
|
|
||
| return self.dag | ||
|
|
||
| @provide_session | ||
| def get_previous_dagrun(self, session=None): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please rename to "get_previous", it is already part of DagRun (or even make it a property) |
||
| """The previous DagRun, if there is one""" | ||
|
|
||
| return session.query(DagRun).filter( | ||
| DagRun.dag_id == self.dag_id, | ||
| DagRun.execution_date < self.execution_date | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While it is outside the scope of this PR, this is potentially a quite expensive operation on the DB. |
||
| ).order_by( | ||
| DagRun.execution_date.desc() | ||
| ).first() | ||
|
|
||
| @provide_session | ||
| def get_previous_scheduled_dagrun(self, session=None): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if I understand the difference between get_previous_dagrun and get_previous_scheduled_dagrun . Should get_previous_dagrun also not return the same as get_previous_scheduled_dagrun? Or do you want to distinguish between backfilled and intervalled dag runs? Please rename to "get_previous_scheduled" it is already part of DagRun. |
||
| """The previous, SCHEDULED DagRun, if there is one""" | ||
|
|
||
| if not self.dag: | ||
| return None | ||
|
|
||
| return session.query(DagRun).filter( | ||
| DagRun.dag_id == self.dag_id, | ||
| DagRun.execution_date == self.dag.previous_schedule(self.execution_date) | ||
| ).first() | ||
|
|
||
| @provide_session | ||
| def update_state(self, session=None): | ||
| """ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a duplicate from above