In [0]:
!pip install apache-airflow
!pip install azure-storage-blob==12.19.0

In [0]:


from airflow import DAG
import airflow
from datetime import datetime, timedelta
from airflow.operators.python import PythonOperator
from read_raw_data import read_raw_df
from transform_data import save_tabular_df
from group_data import group_data
import logging 
logger = logging.getLogger("Airflow process")
logging.Formatter("%(asctime)s:%(levelname)s: %(message)s")

logger.info('Defining args')
default_args = {
    'owner': 'airflow',
    'start_date': datetime(2023, 1, 1),
    'depends_on_past': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

logger.info('Defining DAG')
dag = DAG('dbrewery_dl_dag', default_args=default_args, schedule='@daily')


In [0]:

logger.info('Creating PythonOperator to read the raw data (Bronze Container)')
bc_task = PythonOperator(
    task_id='Bronze_container_construiction',
    python_callable=read_raw_df,
    dag=dag,
)

logger.info('Creating PythonOperator to transform and save the raw data (Silver Container)')
sc_task = PythonOperator(
    task_id='Silver_container_construiction',
    python_callable=save_tabular_df,
    dag=dag,
)

logger.info('Creating PythonOperator to group by brewery type (Gold Container)')
gc_task = PythonOperator(
    task_id='Gold_container_construiction',
    python_callable=group_data,
    dag=dag,
)



In [0]:
%%bash 
airflow db init
airflow users create --username admin --firstname Gabriel --lastname Avezum --password 123  --role Admin --email avezumgabriel@gmail.com

In [0]:
%%bash
export AIRFLOW_HOME=$(pwd)

In [0]:
%%bash 
cd airflow
export AIRFLOW_HOME=$(pwd)
airflow scheduler

In [0]:
%sh airflow db init


In [0]:
%sh airflow webserver --port 8080

In [0]:
%sh airflow scheduler