# Scheduling pipeline execution with Airflow

In [None]:
#Scheduled dag for Titanic data
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator
from datetime import datetime, timedelta
import pandas as pd

In [None]:
#Default arguments 
default_args ={
    'owner': 'Evangelista, Eduardo', 
    'dependes_on_past': False,
    'start_date': datetime(2020, 01, 25, 15),
    'email': ['eoevangelista@gmail.com', 'eoevangelista@gmail.com'],
    'email_on_failure': False,
    'email_on_retry': False,
    "retries": 1,
    "retry_delay": timedelta(minutes=1)
}

In [None]:
#Let's define the DAG - Flow
dag = DAG(
    "trein-02", 
    description="Extract Titanic data from the internet and calculate the average age",
    default_args=default_args,
    schedule_interval="*/2 * * * *"
)

get_data = BashOperator(
    task_id='get-data',
    bash_command='curl https://raw.githubusercontent.com/A3Data/hermione/master/hermione/file_text/train.csv -o ~/train.csv',
    dag=dag
)

In [None]:
def calculate_mean_age():
    df = pd.read_csv('~/train.csv')
    med = df.Age.mean()
    return med

def print_age(**context):
    value = context['task_instance'].xcom_pull(task_ids='calcula-idade-media')
    print(f"The average age on the Titanic was {value} years.")

task_idade_media =PythonOperator(
    task_id='calcula-idade-media',
    python_callable=calculate_mean_age,
    dag=dag
)

task_print_idade = PythonOperator(
    task_id='mostra-idade',
    python_callable=print_age,
    provide_context=True,
    dag=dag
)

In [None]:
get_data >> task_idade_media >> task_print_idade

# Screenshots from Airflow application pipeline execution

![Page1](Page1.png)

![Page2](Page2.png)

![Page3](Page3.png)

![Page4](Page4.png)