**SageMaker Repo:  Workflow/Airflow**:  
https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/workflow/airflow.py

**Airflow Repo:  SageMaker Operators**: 
https://github.com/apache/airflow/tree/master/airflow/providers/amazon/aws

**Airflow Workshop**:
https://www.sagemakerworkshop.com/airflow

**Blog Post for Airflow Workshop**:
https://aws.amazon.com/blogs/machine-learning/build-end-to-end-machine-learning-workflows-with-amazon-sagemaker-and-apache-airflow/

# Define the Directed Acyclic Graph (DAG) of Execution

In [None]:
import airflow
from airflow import DAG

default_args = {
    'owner': 'airflow',
    'provide_context': True
}

dag = DAG('bert_reviews', 
          default_args=default_args,
          schedule_interval='@once')

In [None]:
init = DummyOperator(
    task_id='start',
    dag=dag
)

# SageMaker Processing Job Operator

In [None]:
from airflow.contrib.operators.sagemaker_processing_operator import SageMakerProcessingOperator
from sagemaker.workflow.airflow import processing_config

process_config = processing_config(estimator=estimator,
                                   inputs=input_data_s3_uri,
                                   outputs=output_data_s3_uri)

process_op = SageMakerProcessingOperator(
    task_id='process',
    config=process_config,
    wait_for_completion=True,
    dag=dag)

In [None]:
process_op.set_upstream(init)

# SageMaker Training Job Operator

In [None]:
from sagemaker.tensorflow import TensorFlow

estimator = TensorFlow(entry_point='tf_bert_reviews.py',
                       source_dir='src',
                       role=role,
                       instance_count=train_instance_count,
                       instance_type=train_instance_type,
                       volume_size=train_volume_size,
                       use_spot_instances=True,
                       max_wait=7200, # Seconds to wait for spot instances to become available
                       checkpoint_s3_uri=checkpoint_s3_uri,
                       py_version='py3',
                       framework_version='2.1.0',
                       hyperparameters={'epochs': epochs,
                                        'learning_rate': learning_rate,
                                        'epsilon': epsilon,
                                        'train_batch_size': train_batch_size,
                                        'validation_batch_size': validation_batch_size,
                                        'test_batch_size': test_batch_size,                                             
                                        'train_steps_per_epoch': train_steps_per_epoch,
                                        'validation_steps': validation_steps,
                                        'test_steps': test_steps,
                                        'use_xla': use_xla,
                                        'use_amp': use_amp,                                             
                                        'max_seq_length': max_seq_length,
                                        'freeze_bert_layer': freeze_bert_layer,
                                        'enable_sagemaker_debugger': enable_sagemaker_debugger,
                                        'enable_checkpointing': enable_checkpointing,
                                        'enable_tensorboard': enable_tensorboard,                                        
                                        'run_validation': run_validation,
                                        'run_test': run_test,
                                        'run_sample_predictions': run_sample_predictions},
                       input_mode=input_mode,
                       metric_definitions=metrics_definitions,
                       rules=rules,
                       debugger_hook_config=hook_config,                       
                       max_run=7200, # number of seconds
                      )

In [None]:
from airflow.contrib.operators.sagemaker_training_operator import SageMakerTrainingOperator
from sagemaker.workflow.airflow import training_config

train_config = training_config(estimator=estimator,
                               inputs=input_data_s3_uri)

train_op = SageMakerTrainingOperator(
    task_id='train',
    config=train_config,
    wait_for_completion=True,
    dag=dag)

In [None]:
train_op.set_upstream(process_op)

# SageMaker Model Operator

In [None]:
from airflow.contrib.operators.sagemaker_model_operator import SageMakerModelOperator
from sagemaker.workflow.airflow import model_config

model_op = SageMakerModelOperator(
    task_id='model',
    config=model_config,
    wait_for_completion=True,
    dag=dag)

In [None]:
model_op.set_upstream(train_op)

# SageMaker Endpoint Operator

In [None]:
from airflow.contrib.operators.sagemaker_endpoint_operator import SageMakerEndpointOperator
from sagemaker.workflow.airflow import endpoint_config, # deploy_config_from_estimator

deploy_op = SageMakerEndpointOperator(
    task_id='deploy',
    config=endpoint_config,
    wait_for_completion=True,
    dag=dag)

In [None]:
deploy_op.set_upstream(model_op)

# Setup DAG Dependencies

In [None]:
init.set_downstream(process_op)
processing_op.set_downstream(train_op)
train_op.set_downstream(model_op)
model_op.set_downstream(deploy_op)