Skip to content

Commit

Permalink
Updating Google Cloud example DAGs to use XComArgs (#16875)
Browse files Browse the repository at this point in the history
  • Loading branch information
josh-fell committed Jul 26, 2021
1 parent aaf44cc commit 448e50b
Show file tree
Hide file tree
Showing 26 changed files with 271 additions and 177 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
task_id="create_dataset_task", dataset=DATASET, location=GCP_AUTOML_LOCATION
)

dataset_id = '{{ task_instance.xcom_pull("create_dataset_task", key="dataset_id") }}'
dataset_id = create_dataset_task.output['dataset_id']

import_dataset_task = AutoMLImportDataOperator(
task_id="import_dataset_task",
Expand All @@ -80,7 +80,7 @@

create_model = AutoMLTrainModelOperator(task_id="create_model", model=MODEL, location=GCP_AUTOML_LOCATION)

model_id = "{{ task_instance.xcom_pull('create_model', key='model_id') }}"
model_id = create_model.output['model_id']

delete_model_task = AutoMLDeleteModelOperator(
task_id="delete_model_task",
Expand All @@ -96,4 +96,10 @@
project_id=GCP_PROJECT_ID,
)

create_dataset_task >> import_dataset_task >> create_model >> delete_model_task >> delete_datasets_task
import_dataset_task >> create_model
delete_model_task >> delete_datasets_task

# Task dependencies created via `XComArgs`:
# create_dataset_task >> import_dataset_task
# create_dataset_task >> create_model
# create_dataset_task >> delete_datasets_task
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
task_id="create_dataset_task", dataset=DATASET, location=GCP_AUTOML_LOCATION
)

dataset_id = '{{ task_instance.xcom_pull("create_dataset_task", key="dataset_id") }}'
dataset_id = create_dataset_task.output['dataset_id']

import_dataset_task = AutoMLImportDataOperator(
task_id="import_dataset_task",
Expand All @@ -80,7 +80,7 @@

create_model = AutoMLTrainModelOperator(task_id="create_model", model=MODEL, location=GCP_AUTOML_LOCATION)

model_id = "{{ task_instance.xcom_pull('create_model', key='model_id') }}"
model_id = create_model.output['model_id']

delete_model_task = AutoMLDeleteModelOperator(
task_id="delete_model_task",
Expand All @@ -96,4 +96,11 @@
project_id=GCP_PROJECT_ID,
)

create_dataset_task >> import_dataset_task >> create_model >> delete_model_task >> delete_datasets_task
import_dataset_task >> create_model
delete_model_task >> delete_datasets_task

# Task dependencies created via `XComArgs`:
# create_dataset_task >> import_dataset_task
# create_dataset_task >> create_model
# create_model >> delete_model_task
# create_dataset_task >> delete_datasets_task
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
task_id="create_dataset_task", dataset=DATASET, location=GCP_AUTOML_LOCATION
)

dataset_id = '{{ task_instance.xcom_pull("create_dataset_task", key="dataset_id") }}'
dataset_id = create_dataset_task.output['dataset_id']

import_dataset_task = AutoMLImportDataOperator(
task_id="import_dataset_task",
Expand All @@ -81,7 +81,7 @@

create_model = AutoMLTrainModelOperator(task_id="create_model", model=MODEL, location=GCP_AUTOML_LOCATION)

model_id = "{{ task_instance.xcom_pull('create_model', key='model_id') }}"
model_id = create_model.output['model_id']

delete_model_task = AutoMLDeleteModelOperator(
task_id="delete_model_task",
Expand All @@ -97,4 +97,11 @@
project_id=GCP_PROJECT_ID,
)

create_dataset_task >> import_dataset_task >> create_model >> delete_model_task >> delete_datasets_task
import_dataset_task >> create_model
delete_model_task >> delete_datasets_task

# Task dependencies created via `XComArgs`:
# create_dataset_task >> import_dataset_task
# create_dataset_task >> create_model
# create_model >> delete_model_task
# create_dataset_task >> delete_datasets_task
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def get_target_column_spec(columns_specs: List[Dict], column_name: str) -> str:
project_id=GCP_PROJECT_ID,
)

dataset_id = "{{ task_instance.xcom_pull('create_dataset_task', key='dataset_id') }}"
dataset_id = create_dataset_task.output['dataset_id']
# [END howto_operator_automl_create_dataset]

MODEL["dataset_id"] = dataset_id
Expand Down Expand Up @@ -156,7 +156,7 @@ def get_target_column_spec(columns_specs: List[Dict], column_name: str) -> str:
project_id=GCP_PROJECT_ID,
)

model_id = "{{ task_instance.xcom_pull('create_model_task', key='model_id') }}"
model_id = create_model_task.output['model_id']
# [END howto_operator_automl_create_model]

# [START howto_operator_automl_delete_model]
Expand All @@ -176,15 +176,21 @@ def get_target_column_spec(columns_specs: List[Dict], column_name: str) -> str:
)

(
create_dataset_task
>> import_dataset_task
import_dataset_task
>> list_tables_spec_task
>> list_columns_spec_task
>> update_dataset_task
>> create_model_task
>> delete_model_task
>> delete_datasets_task
)
delete_model_task >> delete_datasets_task

# Task dependencies created via `XComArgs`:
# create_dataset_task >> import_dataset_task
# create_dataset_task >> list_tables_spec_task
# create_dataset_task >> list_columns_spec_task
# create_dataset_task >> create_model_task
# create_model_task >> delete_model_task
# create_dataset_task >> delete_datasets_task


# Example DAG for AutoML datasets operations
Expand All @@ -201,7 +207,7 @@ def get_target_column_spec(columns_specs: List[Dict], column_name: str) -> str:
project_id=GCP_PROJECT_ID,
)

dataset_id = '{{ task_instance.xcom_pull("create_dataset_task", key="dataset_id") }}'
dataset_id = create_dataset_task.output['dataset_id']

import_dataset_task = AutoMLImportDataOperator(
task_id="import_dataset_task",
Expand Down Expand Up @@ -243,14 +249,19 @@ def get_target_column_spec(columns_specs: List[Dict], column_name: str) -> str:
# [END howto_operator_delete_dataset]

(
create_dataset_task
>> import_dataset_task
import_dataset_task
>> list_tables_spec_task
>> list_columns_spec_task
>> list_datasets_task
>> delete_datasets_task
)

# Task dependencies created via `XComArgs`:
# create_dataset_task >> import_dataset_task
# create_dataset_task >> list_tables_spec_task
# create_dataset_task >> list_columns_spec_task


with models.DAG(
"example_gcp_get_deploy",
schedule_interval=None, # Override to match your needs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
task_id="create_dataset_task", dataset=DATASET, location=GCP_AUTOML_LOCATION
)

dataset_id = '{{ task_instance.xcom_pull("create_dataset_task", key="dataset_id") }}'
dataset_id = create_dataset_task.output["dataset_id"]

import_dataset_task = AutoMLImportDataOperator(
task_id="import_dataset_task",
Expand All @@ -87,7 +87,7 @@

create_model = AutoMLTrainModelOperator(task_id="create_model", model=MODEL, location=GCP_AUTOML_LOCATION)

model_id = "{{ task_instance.xcom_pull('create_model', key='model_id') }}"
model_id = create_model.output["model_id"]

delete_model_task = AutoMLDeleteModelOperator(
task_id="delete_model_task",
Expand All @@ -103,4 +103,11 @@
project_id=GCP_PROJECT_ID,
)

create_dataset_task >> import_dataset_task >> create_model >> delete_model_task >> delete_datasets_task
import_dataset_task >> create_model
delete_model_task >> delete_datasets_task

# Task dependencies created via `XComArgs`:
# create_dataset_task >> import_dataset_task
# create_dataset_task >> create_model
# create_model >> delete_model_task
# create_dataset_task >> delete_datasets_task
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
task_id="create_dataset_task", dataset=DATASET, location=GCP_AUTOML_LOCATION
)

dataset_id = '{{ task_instance.xcom_pull("create_dataset_task", key="dataset_id") }}'
dataset_id = create_dataset_task.output["dataset_id"]

import_dataset_task = AutoMLImportDataOperator(
task_id="import_dataset_task",
Expand All @@ -84,7 +84,7 @@

create_model = AutoMLTrainModelOperator(task_id="create_model", model=MODEL, location=GCP_AUTOML_LOCATION)

model_id = "{{ task_instance.xcom_pull('create_model', key='model_id') }}"
model_id = create_model.output["model_id"]

delete_model_task = AutoMLDeleteModelOperator(
task_id="delete_model_task",
Expand All @@ -100,4 +100,11 @@
project_id=GCP_PROJECT_ID,
)

create_dataset_task >> import_dataset_task >> create_model >> delete_model_task >> delete_datasets_task
import_dataset_task >> create_model
delete_model_task >> delete_datasets_task

# Task dependencies created via `XComArgs`:
# create_dataset_task >> import_dataset_task
# create_dataset_task >> create_model
# create_model >> delete_model_task
# create_dataset_task >> delete_datasets_task
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
task_id="create_dataset_task", dataset=DATASET, location=GCP_AUTOML_LOCATION
)

dataset_id = '{{ task_instance.xcom_pull("create_dataset_task", key="dataset_id") }}'
dataset_id = create_dataset_task.output["dataset_id"]

import_dataset_task = AutoMLImportDataOperator(
task_id="import_dataset_task",
Expand All @@ -85,7 +85,7 @@

create_model = AutoMLTrainModelOperator(task_id="create_model", model=MODEL, location=GCP_AUTOML_LOCATION)

model_id = "{{ task_instance.xcom_pull('create_model', key='model_id') }}"
model_id = create_model.output["model_id"]

delete_model_task = AutoMLDeleteModelOperator(
task_id="delete_model_task",
Expand All @@ -101,4 +101,11 @@
project_id=GCP_PROJECT_ID,
)

create_dataset_task >> import_dataset_task >> create_model >> delete_model_task >> delete_datasets_task
import_dataset_task >> create_model
delete_model_task >> delete_datasets_task

# Task dependencies created via `XComArgs`:
# create_dataset_task >> import_dataset_task
# create_dataset_task >> create_model
# create_model >> delete_model_task
# create_dataset_task >> delete_datasets_task
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
task_id="create_dataset_task", dataset=DATASET, location=GCP_AUTOML_LOCATION
)

dataset_id = '{{ task_instance.xcom_pull("create_dataset_task", key="dataset_id") }}'
dataset_id = create_dataset_task.output["dataset_id"]

import_dataset_task = AutoMLImportDataOperator(
task_id="import_dataset_task",
Expand All @@ -82,7 +82,7 @@

create_model = AutoMLTrainModelOperator(task_id="create_model", model=MODEL, location=GCP_AUTOML_LOCATION)

model_id = "{{ task_instance.xcom_pull('create_model', key='model_id') }}"
model_id = create_model.output["model_id"]

delete_model_task = AutoMLDeleteModelOperator(
task_id="delete_model_task",
Expand All @@ -98,4 +98,11 @@
project_id=GCP_PROJECT_ID,
)

create_dataset_task >> import_dataset_task >> create_model >> delete_model_task >> delete_datasets_task
import_dataset_task >> create_model
delete_model_task >> delete_datasets_task

# Task dependencies created via `XComArgs`:
# create_dataset_task >> import_dataset_task
# create_dataset_task >> create_model
# create_model >> delete_model_task
# create_dataset_task >> delete_datasets_task
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
task_id="create_dataset_task", dataset=DATASET, location=GCP_AUTOML_LOCATION
)

dataset_id = '{{ task_instance.xcom_pull("create_dataset_task", key="dataset_id") }}'
dataset_id = create_dataset_task.output["dataset_id"]

import_dataset_task = AutoMLImportDataOperator(
task_id="import_dataset_task",
Expand All @@ -84,7 +84,7 @@

create_model = AutoMLTrainModelOperator(task_id="create_model", model=MODEL, location=GCP_AUTOML_LOCATION)

model_id = "{{ task_instance.xcom_pull('create_model', key='model_id') }}"
model_id = create_model.output["model_id"]

delete_model_task = AutoMLDeleteModelOperator(
task_id="delete_model_task",
Expand All @@ -100,4 +100,11 @@
project_id=GCP_PROJECT_ID,
)

create_dataset_task >> import_dataset_task >> create_model >> delete_model_task >> delete_datasets_task
import_dataset_task >> create_model
delete_model_task >> delete_datasets_task

# Task dependencies created via `XComArgs`:
# create_dataset_task >> import_dataset_task
# create_dataset_task >> create_model
# create_model >> delete_model_task
# create_dataset_task >> delete_datasets_task
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,18 @@
AZURE_SHARE_NAME = os.environ.get('AZURE_SHARE_NAME', 'test-azure-share')
AZURE_DIRECTORY_NAME = "test-azure-dir"

default_args = {
'owner': 'airflow',
'depends_on_past': False,
'email': ['airflow@example.com'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
}

with DAG(
dag_id='azure_fileshare_to_gcs_example',
default_args=default_args,
default_args={
'owner': 'airflow',
'depends_on_past': False,
'email': ['airflow@example.com'],
'email_on_failure': False,
'email_on_retry': False,
'retries': 1,
'retry_delay': timedelta(minutes=5),
},
schedule_interval=None,
start_date=datetime(2018, 11, 1),
tags=['example'],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,7 @@
task_id="gcp_bigquery_create_transfer",
)

transfer_config_id = (
"{{ task_instance.xcom_pull('gcp_bigquery_create_transfer', key='transfer_config_id') }}"
)
transfer_config_id = gcp_bigquery_create_transfer.output["transfer_config_id"]
# [END howto_bigquery_create_data_transfer]

# [START howto_bigquery_start_transfer]
Expand All @@ -86,14 +84,13 @@
transfer_config_id=transfer_config_id,
requested_run_time={"seconds": int(time.time() + 60)},
)
run_id = "{{ task_instance.xcom_pull('gcp_bigquery_start_transfer', key='run_id') }}"
# [END howto_bigquery_start_transfer]

# [START howto_bigquery_dts_sensor]
gcp_run_sensor = BigQueryDataTransferServiceTransferRunSensor(
task_id="gcp_run_sensor",
transfer_config_id=transfer_config_id,
run_id=run_id,
run_id=gcp_bigquery_start_transfer.output["run_id"],
expected_statuses={"SUCCEEDED"},
)
# [END howto_bigquery_dts_sensor]
Expand All @@ -104,9 +101,10 @@
)
# [END howto_bigquery_delete_data_transfer]

(
gcp_bigquery_create_transfer
>> gcp_bigquery_start_transfer
>> gcp_run_sensor
>> gcp_bigquery_delete_transfer
)
gcp_run_sensor >> gcp_bigquery_delete_transfer

# Task dependencies created via `XComArgs`:
# gcp_bigquery_create_transfer >> gcp_bigquery_start_transfer
# gcp_bigquery_create_transfer >> gcp_run_sensor
# gcp_bigquery_start_transfer >> gcp_run_sensor
# gcp_bigquery_create_transfer >> gcp_bigquery_delete_transfer
Loading

0 comments on commit 448e50b

Please sign in to comment.