# Setup Airflow

Install airflow
```bash
pip install "apache-airflow==2.8.2"
```

The default airflow config is `~/airflow/airflow.cfg` and change the following :
- dags_folder to a directory where you want to save the DAGs. Also update the dags_folder variable in the cell below.
- load_examples = False

In a terminal run, (**remember to change the parameters for airflow users create**)
```bash
airflow db migrate
airflow users create --username jan --password admin123 --firstname jan --lastname steeb --role Admin --email jwsteeb@gmail.com
```

To run the DAGs use two seperate terminals and run:
```bash
airflow webserver -p 8080
airflow schedular
```
The ui can now be accessed using `http://localhost:8080/`

The command `airflow dags list` will list all the availble DAGs.

In [8]:
dags_folder = "/Users/jsteeb/Dropbox/graphviper/docs/airflow_dags"

In [9]:
from graphviper.utils.data import download

download(file="Antennae_North.cal.lsrk.split.vis.zarr")

import pandas as pd

pd.options.display.max_colwidth = 100
ps_name = "Antennae_North.cal.lsrk.split.vis.zarr"

from xradio.vis.read_processing_set import read_processing_set

intents = ["OBSERVE_TARGET#ON_SOURCE"]
fields = None
ps = read_processing_set(
    ps_name="Antennae_North.cal.lsrk.split.vis.zarr",
    intents=intents,
    fields=fields,
)
display(ps.summary())

[[38;2;128;05;128m2024-03-07 17:34:49,750[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m  graphviper: [0m File exists: Antennae_North.cal.lsrk.split.vis.zarr 


Unnamed: 0,name,ddi,intent,field_id,field_name,start_frequency,end_frequency
0,Antennae_North.cal.lsrk.split_ddi_0_intent_OBSERVE_TARGET#ON_SOURCE_field_id_1,0,OBSERVE_TARGET#ON_SOURCE,1,NGC4038 - Antennae North,343928100000.0,344006700000.0
1,Antennae_North.cal.lsrk.split_ddi_0_intent_OBSERVE_TARGET#ON_SOURCE_field_id_0,0,OBSERVE_TARGET#ON_SOURCE,0,NGC4038 - Antennae North,343928100000.0,344006700000.0
2,Antennae_North.cal.lsrk.split_ddi_0_intent_OBSERVE_TARGET#ON_SOURCE_field_id_2,0,OBSERVE_TARGET#ON_SOURCE,2,NGC4038 - Antennae North,343928100000.0,344006700000.0


In [10]:
from graphviper.graph_tools.coordinate_utils import make_parallel_coord
from graphviper.utils.display import dict_to_html
from IPython.display import HTML, display

ms_xds = ps['Antennae_North.cal.lsrk.split_ddi_0_intent_OBSERVE_TARGET#ON_SOURCE_field_id_1']

parallel_coords = {}
n_chunks = 3
parallel_coords["frequency"] = make_parallel_coord(
    coord=ms_xds.frequency, n_chunks=n_chunks
)
#display(HTML(dict_to_html(parallel_coords["frequency"])))

from graphviper.graph_tools.coordinate_utils import make_frequency_coord

n_chunks = 3

coord = make_frequency_coord(
    freq_start=343928096685.9587,
    freq_delta=11231488.981445312,
    n_channels=8,
    velocity_frame="lsrk",
)
parallel_coords["frequency"] = make_parallel_coord(
    coord=coord, n_chunks=n_chunks
)
#display(HTML(dict_to_html(parallel_coords["frequency"])))

from graphviper.graph_tools.coordinate_utils import (
    interpolate_data_coords_onto_parallel_coords,
)

node_task_data_mapping = interpolate_data_coords_onto_parallel_coords(
    parallel_coords, ps
)

from graphviper.graph_tools import map, reduce
from graphviper.graph_tools.generate_dask_workflow import generate_dask_workflow
import dask
from graphviper.utils.display import dict_to_html
from IPython.display import display, HTML


def my_func(input_params):
    #display(HTML(dict_to_html(input_params)))

    import logging  
    logging.info("*" * 30)
    return input_params["test_input"]


input_params = {}
input_params["test_input"] = 42

viper_graph = map(
    input_data=ps,
    node_task_data_mapping=node_task_data_mapping,
    node_task=my_func,
    input_params=input_params,
)

def my_sum(graph_inputs, input_params):
    print(graph_inputs)
    return np.sum(graph_inputs / input_params["test_input"])


input_params = {}
input_params["test_input"] = 5
viper_graph_reduce = reduce(
    viper_graph, my_sum, input_params, mode="single_node"
)  # mode "tree","single_node"

print(viper_graph_reduce)

from graphviper.graph_tools import generate_airflow_workflow

generate_airflow_workflow(viper_graph,filename=os.path.join(dags_folder,'map_reduce_3.py'),dag_name='map_reduce_3')



{'map': {'node_task': <function my_func at 0x122b3b6a0>, 'input_params': [{'test_input': 42, 'chunk_indices': (2,), 'parallel_dims': ['frequency'], 'data_selection': {'Antennae_North.cal.lsrk.split_ddi_0_intent_OBSERVE_TARGET#ON_SOURCE_field_id_1': {'frequency': slice(6, 8, None)}, 'Antennae_North.cal.lsrk.split_ddi_0_intent_OBSERVE_TARGET#ON_SOURCE_field_id_0': {'frequency': slice(6, 8, None)}, 'Antennae_North.cal.lsrk.split_ddi_0_intent_OBSERVE_TARGET#ON_SOURCE_field_id_2': {'frequency': slice(6, 8, None)}}, 'task_coords': {'frequency': {'data': array([3.43995486e+11, 3.44006717e+11]), 'dims': 'frequency', 'attrs': {'units': 'Hz', 'type': 'spectral_coord', 'velocity_frame': 'lsrk'}}}, 'task_id': 2, 'input_data': None, 'date_time': None}, {'test_input': 42, 'chunk_indices': (2,), 'parallel_dims': ['frequency'], 'data_selection': {'Antennae_North.cal.lsrk.split_ddi_0_intent_OBSERVE_TARGET#ON_SOURCE_field_id_1': {'frequency': slice(6, 8, None)}, 'Antennae_North.cal.lsrk.split_ddi_0_inte