# Airflow
- https://towardsdatascience.com/a-complete-introduction-to-apache-airflow-b7e238a33df
- https://aws.amazon.com/cn/blogs/china/orchestrate-big-data-workflows-with-apache-airflow-genie-and-amazon-emr-part-2/
- https://www.cnblogs.com/huanghanyu/articles/12923493.html
- https://leemeng.tw/a-story-about-airflow-and-data-engineering-using-how-to-use-python-to-catch-up-with-latest-comics-as-an-example.html


* conda update -n base conda
* conda create -n airflow pip setuptools python=3.6
* conda activate airflow
* pip install "apache-airflow[s3, postgres]"

https://github.com/hgrif/airflow-tutorial

https://towardsdatascience.com/a-complete-introduction-to-apache-airflow-b7e238a33df

https://medium.com/abn-amro-developer/data-pipeline-orchestration-on-steroids-apache-airflow-tutorial-part-1-87361905db6d

https://www.analyticsvidhya.com/blog/2020/11/getting-started-with-apache-airflow/

https://docs.aws.amazon.com/mwaa/latest/userguide/quick-start.html

https://marclamberti.com/blog/airflow-dag-creating-your-first-dag-in-5-minutes/

https://airflow.readthedocs.io/en/1.10.14/_modules/airflow/example_dags/tutorial.html

https://blog.coursesity.com/best-apache-airflow-tutorials/

https://www.astronomer.io/guides/

https://www.applydatascience.com/airflow/writing-your-first-pipeline/

#### install
- https://airflow-tutorial.readthedocs.io/en/latest/setup.html
- https://medium.com/@ryanroline/installing-apache-airflow-on-windows-10-5247aa1249ef

#### activate
- conda activate airflow-env

#### deactivate
- conda deactivate

#### install mysql
- conda install -c anaconda mysql

In [1]:
from datetime import datetime, timedelta
from textwrap import dedent

# The DAG object; we'll need this to instantiate a DAG
from airflow import DAG

# Operators; we need this to operate!
from airflow.operators.bash import BashOperator
# These args will get passed on to each operator
# You can override them on a per-task basis during operator initialization
default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'email': ['airflow@example.com'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 1, 1),
    # 'wait_for_downstream': False,
    # 'dag': dag,
    # 'sla': timedelta(hours=2),
    # 'execution_timeout': timedelta(seconds=300),
    # 'on_failure_callback': some_function,
    # 'on_success_callback': some_other_function,
    # 'on_retry_callback': another_function,
    # 'sla_miss_callback': yet_another_function,
    # 'trigger_rule': 'all_success'
}

with DAG(
    'tutorial',
    default_args=default_args,
    description='A simple tutorial DAG',
    schedule_interval=timedelta(days=1),
    start_date=datetime(2021, 1, 1),
    catchup=False,
    tags=['example'],
) as dag:

    # t1, t2 and t3 are examples of tasks created by instantiating operators
    t1 = BashOperator(
        task_id='print_date',
        bash_command='date',
    )

    t2 = BashOperator(
        task_id='sleep',
        depends_on_past=False,
        bash_command='sleep 5',
        retries=3,
    )
    t1.doc_md = dedent(
        """\
    #### Task Documentation
    You can document your task using the attributes `doc_md` (markdown),
    `doc` (plain text), `doc_rst`, `doc_json`, `doc_yaml` which gets
    rendered in the UI's Task Instance Details page.
    ![img](http://montcs.bloomu.edu/~bobmon/Semesters/2012-01/491/import%20soul.png)

    """
    )

    dag.doc_md = __doc__  # providing that you have a docstring at the beginning of the DAG
    dag.doc_md = """
    This is a documentation placed anywhere
    """  # otherwise, type it like this
    templated_command = dedent(
        """
    {% for i in range(5) %}
        echo "{{ ds }}"
        echo "{{ macros.ds_add(ds, 7)}}"
        echo "{{ params.my_param }}"
    {% endfor %}
    """
    )

    t3 = BashOperator(
        task_id='templated',
        depends_on_past=False,
        bash_command=templated_command,
        params={'my_param': 'Parameter I passed in'},
    )

    t1 >> [t2, t3]

