<a href="https://colab.research.google.com/github/brendanlooker/colab-examples/blob/main/dataform/Composer_CF%26DF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Composer DAG to call a number of Cloud Functions before executing a Dataform pipeline


from datetime import datetime, timedelta
from airflow import DAG
from airflow.operators.dummy import DummyOperator
from airflow.utils.dates import days_ago
from airflow.operators.python_operator import PythonOperator
from airflow.providers.google.cloud.transfers.gcs_to_bigquery import GCSToBigQueryOperator
import logging
import google.auth.transport.requests
import google.oauth2.id_token
import urllib
from airflow.providers.google.cloud.operators.dataform import (
    DataformCancelWorkflowInvocationOperator,
    DataformCreateCompilationResultOperator,
    DataformCreateWorkflowInvocationOperator,
    DataformGetCompilationResultOperator,
    DataformGetWorkflowInvocationOperator,
)

# Set up logging
logging.basicConfig(level=logging.INFO)

PROJECT_ID = ''
REGION = 'europe-west1'
REPOSITORY_ID = 'repo_name'
GIT_COMMITISH = 'master'


def make_authorized_get_request(endpoint, audience):
    """
    Make a GET request to the specified HTTP endpoint
    by authenticating with the ID token obtained from the google-auth client library
    using the specified audience value.
    """

    req = urllib.request.Request(endpoint)

    auth_req = google.auth.transport.requests.Request()
    id_token = google.oauth2.id_token.fetch_id_token(auth_req, audience)

    req.add_header("Authorization", f"Bearer {id_token}")
    response = urllib.request.urlopen(req)

    return response.read().decode('utf-8')

def invoke_cloud_function(url, audience):
    # Call Cloud Function
    response = make_authorized_get_request(url, audience)
    logging.info(response)


default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'start_date':  days_ago(1),
    'retry_delay': timedelta(minutes=30),
}
with DAG(
    'run_pim_data_pipeline',
    default_args=default_args,
    description='Execute PIM Data Pipeline',
    schedule_interval=timedelta(days=1),
) as dag:

    # Define the PythonOperator task for invoking Cloud Function
    invoke_xml_2_json_cf_task = PythonOperator(
        task_id='invoke_xml_2_json_cf',
        python_callable=invoke_cloud_function,
        op_kwargs={'url': 'https://europe-west1-brendanlooker.cloudfunctions.net/xml-2-json', 'audience': 'https://europe-west1-brendanlooker.cloudfunctions.net/xml-2-json'},
        dag=dag,
    )

    # Define the PythonOperator task for invoking Cloud Function
    invoke_json_2_bq_cf_task = PythonOperator(
        task_id='invoke_json_2_bq_cf',
        python_callable=invoke_cloud_function,
        op_kwargs={'url': 'https://europe-west1-brendanlooker.cloudfunctions.net/json-2-bq', 'audience': 'https://europe-west1-brendanlooker.cloudfunctions.net/json-2-bq'},
        dag=dag,
    )


    # Define the PythonOperator task for invoking Cloud Function
    invoke_bq_2_json_2_xml_cf_task = PythonOperator(
        task_id='invoke_bq_2_json_2_xml_cf',
        python_callable=invoke_cloud_function,
        op_kwargs={'url': 'https://europe-west1-brendanlooker.cloudfunctions.net/bq-2-json-2-xml', 'audience': 'https://europe-west1-brendanlooker.cloudfunctions.net/bq-2-json-2-xml'},
        dag=dag,
    )

    # Create Dataform compilation result
    create_dataform_compilation_result = DataformCreateCompilationResultOperator(
        task_id="create_df_compilation_result",
        project_id=PROJECT_ID,
        region=REGION,
        repository_id=REPOSITORY_ID,
        compilation_result={
            "git_commitish": GIT_COMMITISH,
        },
    )

    # Run Dataform job
    create_dataform_workflow_invocation = DataformCreateWorkflowInvocationOperator(
        task_id='create_dataform_workflow_invocation',
        project_id=PROJECT_ID,
        region=REGION,
        repository_id=REPOSITORY_ID,
            workflow_invocation={
            "compilation_result": "{{ task_instance.xcom_pull('create_df_compilation_result')['name'] }}"
        },
    )

    finish_pipeline = DummyOperator(
        task_id = 'finish_pipeline',
        dag = dag
    )

    # Set task dependencies
    invoke_xml_2_json_cf_task >> invoke_json_2_bq_cf_task >> create_dataform_compilation_result >> create_dataform_workflow_invocation >> invoke_bq_2_json_2_xml_cf_task >> finish_pipeline
