Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ch05 - editor comments #7

Merged
merged 9 commits into from
Jul 5, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions chapter05/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
AIRFLOW_UID=1000
AIRFLOW_GID=0
30 changes: 30 additions & 0 deletions chapter05/dags/01_rocket_pipeline_dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""
Listing: 5.3, 5.4, 5.5, 5.6
Figure: 5.3
"""
import pendulum
from airflow import DAG
from airflow.operators.empty import EmptyOperator

with DAG(
dag_id="01_rocket_pipeline_dependencies",
start_date=pendulum.today("UTC").add(days=-3),
schedule="@daily",
):
start = EmptyOperator(task_id="start")

fetch_sales = EmptyOperator(task_id="fetch_sales")
clean_sales = EmptyOperator(task_id="clean_sales")

fetch_weather = EmptyOperator(task_id="fetch_weather")
clean_weather = EmptyOperator(task_id="clean_weather")

join_datasets = EmptyOperator(task_id="join_datasets")
train_model = EmptyOperator(task_id="train_model")
deploy_model = EmptyOperator(task_id="deploy_model")

start >> [fetch_sales, fetch_weather]
fetch_sales >> clean_sales
fetch_weather >> clean_weather
[clean_sales, clean_weather] >> join_datasets
join_datasets >> train_model >> deploy_model
26 changes: 0 additions & 26 deletions chapter05/dags/01_start.py

This file was deleted.

27 changes: 16 additions & 11 deletions chapter05/dags/02_branch_function.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
"""
Listing: 5.7, 5.8
Figure: 5.6
"""

import pendulum
from airflow import DAG
from airflow.operators.dummy import DummyOperator
from airflow.operators.empty import EmptyOperator
from airflow.operators.python import PythonOperator

ERP_CHANGE_DATE = pendulum.today("UTC").add(days=-1)


def _fetch_sales(**context):
if context["execution_date"] < ERP_CHANGE_DATE:
if context["data_interval_start"] < ERP_CHANGE_DATE:
_fetch_sales_old(**context)
else:
_fetch_sales_new(**context)
Expand All @@ -22,7 +27,7 @@ def _fetch_sales_new(**context):


def _clean_sales(**context):
if context["execution_date"] < ERP_CHANGE_DATE:
if context["data_interval_start"] < ERP_CHANGE_DATE:
_clean_sales_old(**context)
else:
_clean_sales_new(**context)
Expand All @@ -39,19 +44,19 @@ def _clean_sales_new(**context):
with DAG(
dag_id="02_branch_function",
start_date=pendulum.today("UTC").add(days=-3),
schedule_interval="@daily",
) as dag:
start = DummyOperator(task_id="start")
schedule="@daily",
):
start = EmptyOperator(task_id="start")

fetch_sales = PythonOperator(task_id="fetch_sales", python_callable=_fetch_sales)
clean_sales = PythonOperator(task_id="clean_sales", python_callable=_clean_sales)

fetch_weather = DummyOperator(task_id="fetch_weather")
clean_weather = DummyOperator(task_id="clean_weather")
fetch_weather = EmptyOperator(task_id="fetch_weather")
clean_weather = EmptyOperator(task_id="clean_weather")

join_datasets = DummyOperator(task_id="join_datasets")
train_model = DummyOperator(task_id="train_model")
deploy_model = DummyOperator(task_id="deploy_model")
join_datasets = EmptyOperator(task_id="join_datasets")
train_model = EmptyOperator(task_id="train_model")
deploy_model = EmptyOperator(task_id="deploy_model")

start >> [fetch_sales, fetch_weather]
fetch_sales >> clean_sales
Expand Down
60 changes: 0 additions & 60 deletions chapter05/dags/02_branch_task.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
"""
Listing: 5.9, 5.10, 5.11, 5.12, 5.13, 5.14
Figure: 5.8, 5.9
"""

import pendulum
from airflow import DAG
from airflow.operators.dummy import DummyOperator
from airflow.operators.empty import EmptyOperator
from airflow.operators.python import BranchPythonOperator, PythonOperator

ERP_CHANGE_DATE = pendulum.today("UTC").add(days=-1)


def _pick_erp_system(**context):
if context["execution_date"] < ERP_CHANGE_DATE:
if context["data_interval_start"] < ERP_CHANGE_DATE:
return "fetch_sales_old"
else:
return "fetch_sales_new"
Expand All @@ -30,11 +35,11 @@ def _clean_sales_new(**context):


with DAG(
dag_id="03_branch_dag",
dag_id="03_branch_dag_old_new",
start_date=pendulum.today("UTC").add(days=-3),
schedule_interval="@daily",
) as dag:
start = DummyOperator(task_id="start")
schedule="@daily",
):
start = EmptyOperator(task_id="start")

pick_erp_system = BranchPythonOperator(task_id="pick_erp_system", python_callable=_pick_erp_system)

Expand All @@ -44,15 +49,15 @@ def _clean_sales_new(**context):
fetch_sales_new = PythonOperator(task_id="fetch_sales_new", python_callable=_fetch_sales_new)
clean_sales_new = PythonOperator(task_id="clean_sales_new", python_callable=_clean_sales_new)

fetch_weather = DummyOperator(task_id="fetch_weather")
clean_weather = DummyOperator(task_id="clean_weather")
fetch_weather = EmptyOperator(task_id="fetch_weather")
clean_weather = EmptyOperator(task_id="clean_weather")

# Using the wrong trigger rule ("all_success") results in tasks being skipped downstream.
# join_datasets = DummyOperator(task_id="join_datasets")
# join_datasets = EmptyOperator(task_id="join_datasets")

join_datasets = DummyOperator(task_id="join_datasets", trigger_rule="none_failed")
train_model = DummyOperator(task_id="train_model")
deploy_model = DummyOperator(task_id="deploy_model")
join_datasets = EmptyOperator(task_id="join_datasets", trigger_rule="none_failed")
train_model = EmptyOperator(task_id="train_model")
deploy_model = EmptyOperator(task_id="deploy_model")

start >> [pick_erp_system, fetch_weather]
pick_erp_system >> [fetch_sales_old, fetch_sales_new]
Expand Down
31 changes: 18 additions & 13 deletions chapter05/dags/04_branch_dag_join.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
"""
Listing: 5.15
Figure: 5.10
"""

import pendulum
from airflow import DAG
from airflow.operators.dummy import DummyOperator
from airflow.operators.empty import EmptyOperator
from airflow.operators.python import BranchPythonOperator, PythonOperator

ERP_CHANGE_DATE = pendulum.today("UTC").add(days=-1)


def _pick_erp_system(**context):
if context["execution_date"] < ERP_CHANGE_DATE:
if context["data_interval_start"] < ERP_CHANGE_DATE:
return "fetch_sales_old"
else:
return "fetch_sales_new"
Expand All @@ -32,9 +37,9 @@ def _clean_sales_new(**context):
with DAG(
dag_id="04_branch_dag_join",
start_date=pendulum.today("UTC").add(days=-3),
schedule_interval="@daily",
) as dag:
start = DummyOperator(task_id="start")
schedule="@daily",
):
start = EmptyOperator(task_id="start")

pick_erp_system = BranchPythonOperator(task_id="pick_erp_system", python_callable=_pick_erp_system)

Expand All @@ -44,20 +49,20 @@ def _clean_sales_new(**context):
fetch_sales_new = PythonOperator(task_id="fetch_sales_new", python_callable=_fetch_sales_new)
clean_sales_new = PythonOperator(task_id="clean_sales_new", python_callable=_clean_sales_new)

join_erp = DummyOperator(task_id="join_erp_branch", trigger_rule="none_failed")
join_branch = EmptyOperator(task_id="join_erp_branch", trigger_rule="none_failed")

fetch_weather = DummyOperator(task_id="fetch_weather")
clean_weather = DummyOperator(task_id="clean_weather")
fetch_weather = EmptyOperator(task_id="fetch_weather")
clean_weather = EmptyOperator(task_id="clean_weather")

join_datasets = DummyOperator(task_id="join_datasets")
train_model = DummyOperator(task_id="train_model")
deploy_model = DummyOperator(task_id="deploy_model")
join_datasets = EmptyOperator(task_id="join_datasets")
train_model = EmptyOperator(task_id="train_model")
deploy_model = EmptyOperator(task_id="deploy_model")

start >> [pick_erp_system, fetch_weather]
pick_erp_system >> [fetch_sales_old, fetch_sales_new]
fetch_sales_old >> clean_sales_old
fetch_sales_new >> clean_sales_new
[clean_sales_old, clean_sales_new] >> join_erp
[clean_sales_old, clean_sales_new] >> join_branch
fetch_weather >> clean_weather
[join_erp, clean_weather] >> join_datasets
[join_branch, clean_weather] >> join_datasets
join_datasets >> train_model >> deploy_model
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
"""
Listing: 5.3, 5.4, 5.5, 5.6
Figure: 5.3
"""


import pendulum
from airflow import DAG
from airflow.operators.dummy import DummyOperator
from airflow.operators.empty import EmptyOperator
from airflow.operators.python import BranchPythonOperator, PythonOperator

ERP_CHANGE_DATE = pendulum.today("UTC").add(days=-1)


def _pick_erp_system(**context):
if context["execution_date"] < ERP_CHANGE_DATE:
if context["data_interval_start"] < ERP_CHANGE_DATE:
return "fetch_sales_old"
else:
return "fetch_sales_new"
Expand All @@ -20,33 +26,33 @@ def _deploy_model(**context):

def _is_latest_run(**context):
now = pendulum.now("UTC")
left_window = context["dag"].following_schedule(context["execution_date"])
left_window = context["dag"].following_schedule(context["data_interval_start"])
right_window = context["dag"].following_schedule(left_window)
return left_window < now <= right_window


with DAG(
dag_id="05_condition_function",
start_date=pendulum.today("UTC").add(days=-3),
schedule_interval="@daily",
) as dag:
start = DummyOperator(task_id="start")
schedule="@daily",
):
start = EmptyOperator(task_id="start")

pick_erp = BranchPythonOperator(task_id="pick_erp_system", python_callable=_pick_erp_system)

fetch_sales_old = DummyOperator(task_id="fetch_sales_old")
clean_sales_old = DummyOperator(task_id="clean_sales_old")
fetch_sales_old = EmptyOperator(task_id="fetch_sales_old")
clean_sales_old = EmptyOperator(task_id="clean_sales_old")

fetch_sales_new = DummyOperator(task_id="fetch_sales_new")
clean_sales_new = DummyOperator(task_id="clean_sales_new")
fetch_sales_new = EmptyOperator(task_id="fetch_sales_new")
clean_sales_new = EmptyOperator(task_id="clean_sales_new")

join_erp = DummyOperator(task_id="join_erp_branch", trigger_rule="none_failed")
join_erp = EmptyOperator(task_id="join_erp_branch", trigger_rule="none_failed")

fetch_weather = DummyOperator(task_id="fetch_weather")
clean_weather = DummyOperator(task_id="clean_weather")
fetch_weather = EmptyOperator(task_id="fetch_weather")
clean_weather = EmptyOperator(task_id="clean_weather")

join_datasets = DummyOperator(task_id="join_datasets")
train_model = DummyOperator(task_id="train_model")
join_datasets = EmptyOperator(task_id="join_datasets")
train_model = EmptyOperator(task_id="train_model")

deploy_model = PythonOperator(task_id="deploy_model", python_callable=_deploy_model)

Expand Down
Loading