# Setup Airflow

Install airflow
```bash
pip install "apache-airflow==2.8.2"
```

The default airflow config is `~/airflow/airflow.cfg` and change the following :
- dags_folder to a directory where you want to save the DAGs. Also update the dags_folder variable in the cell below.
- load_examples = False

In a terminal run, (**remember to change the parameters for airflow users create**)
```bash
airflow db migrate
airflow users create --username jan --password admin123 --firstname jan --lastname steeb --role Admin --email jwsteeb@gmail.com
```

To run the DAGs use two seperate terminals and run:
```bash
airflow webserver -p 8080
airflow scheduler
```
The ui can now be accessed using `http://localhost:8080/`

The command `airflow dags list` will list all the availble DAGs.

In [1]:
import os
#dags_folder = "/Users/jsteeb/Dropbox/graphviper/docs/airflow_dags"
dags_folder = "~"
dags_folder = os.path.expanduser(dags_folder)
print(dags_folder)

/Users/jsteeb


In [2]:
from toolviper.utils.data import download

download(file="Antennae_North.cal.lsrk.split.ms")

from xradio.measurement_set.convert_msv2_to_processing_set import convert_msv2_to_processing_set

# The chunksize on disk. Chunksize can be specified for any of the following dimensions :
# time, baseline_id (interferometer) / antenna_id (single dish), frequency, and polarization.
chunks_on_disk = {"frequency": 3}
infile = "Antennae_North.cal.lsrk.split.ms"
outfile = "Antennae_North.cal.lsrk.split.ps.zarr"
convert_msv2_to_processing_set(
    in_file=infile,
    out_file=outfile,
    parallel_mode="none",
    overwrite=True,
    main_chunksize=chunks_on_disk,
)

[[38;2;128;05;128m2025-10-07 10:42:23,368[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m    viperlog: [0m Module path: [38;2;50;50;205m/Users/jsteeb/Dropbox/toolviper/src/toolviper[0m 
[[38;2;128;05;128m2025-10-07 10:42:23,370[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m    viperlog: [0m Downloading from [cloudflare] .... 


[[38;2;128;05;128m2025-10-07 10:42:23,373[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m    viperlog: [0m File exists: Antennae_North.cal.lsrk.split.ms 


Output()

[[38;2;128;05;128m2025-10-07 10:42:24,419[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m    viperlog: [0m Updated partition scheme used: ['DATA_DESC_ID', 'OBS_MODE', 'OBSERVATION_ID'] 
[[38;2;128;05;128m2025-10-07 10:42:24,421[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m    viperlog: [0m Number of partitions: 4 
[[38;2;128;05;128m2025-10-07 10:42:24,421[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m    viperlog: [0m OBSERVATION_ID [0], DDI [0], STATE [23, 24, 25, 30, 31, 32, 33, 34, 37], FIELD [0, 1, 2], SCAN [9, 17, 21, 25], EPHEMERIS [None] 
[[38;2;128;05;128m2025-10-07 10:42:24,613[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m    viperlog: [0m OBSERVATION_ID [1], DDI [0], STATE [23, 24, 25, 30, 31, 32, 33, 34, 37], FIELD [0, 1, 2], SCAN [26, 34, 38, 42], EPHEMERIS [None] 
[[38;2;128;05;128m2025-10-07 10:42:24,761[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m    viperlog: [0m OBSERVATION_ID [2], DDI [0], STATE [32, 33, 34], FIELD [0, 1

In [3]:
from toolviper.utils.data import download

import pandas as pd

pd.options.display.max_colwidth = 100
ps_name = "Antennae_North.cal.lsrk.split.ps.zarr"

from xradio.measurement_set import open_processing_set

intents = ["OBSERVE_TARGET#ON_SOURCE"]

ps = open_processing_set(
    ps_store=ps_name,
    scan_intents=intents,
)
display(ps.xr_ps.summary())

Unnamed: 0,name,scan_intents,shape,execution_block_UID,polarization,scan_name,spw_name,spw_intents,field_name,source_name,line_name,field_coords,session_reference_UID,scheduling_block_UID,project_UID,start_frequency,end_frequency
0,Antennae_North.cal.lsrk.split_0,[OBSERVE_TARGET#ON_SOURCE],"(50, 45, 8, 2)",uid://A002/X1ff7b0/Xb,"[XX, YY]","[17, 21, 25, 9]",spw_0,UNSPECIFIED,"[NGC4038 - Antennae North_0, NGC4038 - Antennae North_1, NGC4038 - Antennae North_2]",[NGC4038 - Antennae North_0],[],Multi-Phase-Center,---,uid://A002/X1fd4e7/X64d,T.B.D.,343928100000.0,344006700000.0
1,Antennae_North.cal.lsrk.split_1,[OBSERVE_TARGET#ON_SOURCE],"(50, 55, 8, 2)",uid://A002/X207fe4/X3a,"[XX, YY]","[26, 34, 38, 42]",spw_0,UNSPECIFIED,"[NGC4038 - Antennae North_0, NGC4038 - Antennae North_1, NGC4038 - Antennae North_2]",[NGC4038 - Antennae North_0],[],Multi-Phase-Center,---,uid://A002/X1fd4e7/X64d,T.B.D.,343928100000.0,344006700000.0
2,Antennae_North.cal.lsrk.split_2,[OBSERVE_TARGET#ON_SOURCE],"(15, 55, 8, 2)",uid://A002/X207fe4/X3b9,"[XX, YY]",[43],spw_0,UNSPECIFIED,"[NGC4038 - Antennae North_0, NGC4038 - Antennae North_1, NGC4038 - Antennae North_2]",[NGC4038 - Antennae North_0],[],Multi-Phase-Center,---,uid://A002/X1fd4e7/X64d,T.B.D.,343928100000.0,344006700000.0
3,Antennae_North.cal.lsrk.split_3,"[OBSERVE_TARGET#ON_SOURCE, CALIBRATE_WVR#ON_SOURCE]","(50, 77, 8, 2)",uid://A002/X2181fb/X49,"[XX, YY]","[48, 56, 60, 64]",spw_0,UNSPECIFIED,"[NGC4038 - Antennae North_0, NGC4038 - Antennae North_1, NGC4038 - Antennae North_2]",[NGC4038 - Antennae North_0],[],Multi-Phase-Center,---,uid://A002/X1fd4e7/X64d,T.B.D.,343928100000.0,344006700000.0


In [4]:
%load_ext autoreload
%autoreload 2

from graphviper.graph_tools.coordinate_utils import make_parallel_coord
from toolviper.utils.display import dict_to_html
from IPython.display import HTML, display
import os

ms_xds = ps['Antennae_North.cal.lsrk.split_0']

parallel_coords = {}
n_chunks = 3
parallel_coords["frequency"] = make_parallel_coord(
    coord=ms_xds.frequency, n_chunks=n_chunks
)
#display(HTML(dict_to_html(parallel_coords["frequency"])))

from graphviper.graph_tools.coordinate_utils import make_frequency_coord

n_chunks = 3

coord = make_frequency_coord(
    freq_start=343928096685.9587,
    freq_delta=11231488.981445312,
    n_channels=8,
    velocity_frame="lsrk",
)
parallel_coords["frequency"] = make_parallel_coord(
    coord=coord, n_chunks=n_chunks
)
#display(HTML(dict_to_html(parallel_coords["frequency"])))

from graphviper.graph_tools.coordinate_utils import (
    interpolate_data_coords_onto_parallel_coords,
)

node_task_data_mapping = interpolate_data_coords_onto_parallel_coords(
    parallel_coords, ps
)

from graphviper.graph_tools import map, reduce
from graphviper.graph_tools.generate_dask_workflow import generate_dask_workflow
import dask
from toolviper.utils.display import dict_to_html
from IPython.display import display, HTML


def my_func(input_params):
    #display(HTML(dict_to_html(input_params)))

    import logging  
    logging.info("*" * 30)
    return input_params["test_input"]


input_params = {}
input_params["test_input"] = 42

viper_graph = map(
    input_data=ps,
    node_task_data_mapping=node_task_data_mapping,
    node_task=my_func,
    input_params=input_params,
)

def my_sum(graph_inputs, input_params):
    import numpy as np
    import graphviper.utils.logger as logger
    result = np.sum(np.array(graph_inputs) / input_params["test_input"])
    logger.info('The result is: '+str(result))
    return result


input_params = {}
input_params["test_input"] = 5
viper_graph_reduce = reduce(
    viper_graph, my_sum, input_params, mode="single_node"
)  # mode "tree","single_node"

print(viper_graph_reduce)

from graphviper.graph_tools import generate_airflow_workflow

generate_airflow_workflow(viper_graph,filename=os.path.join(dags_folder,'map_reduce_4.py'),dag_name='map_reduce_4')



{'map': {'node_task': <function my_func at 0x1683c0040>, 'input_params': [{'test_input': 42, 'chunk_indices': (np.int64(0),), 'parallel_dims': ['frequency'], 'data_selection': {'Antennae_North.cal.lsrk.split_0': {'frequency': slice(np.int64(0), np.int64(3), None)}, 'Antennae_North.cal.lsrk.split_1': {'frequency': slice(np.int64(0), np.int64(3), None)}, 'Antennae_North.cal.lsrk.split_2': {'frequency': slice(np.int64(0), np.int64(3), None)}, 'Antennae_North.cal.lsrk.split_3': {'frequency': slice(np.int64(0), np.int64(3), None)}}, 'task_coords': {'frequency': {'data': array([3.43928097e+11, 3.43939328e+11, 3.43950560e+11]), 'dims': 'frequency', 'attrs': {'units': 'Hz', 'type': 'spectral_coord', 'velocity_frame': 'lsrk'}}}, 'task_id': 0, 'input_data': None, 'date_time': None}, {'test_input': 42, 'chunk_indices': (np.int64(1),), 'parallel_dims': ['frequency'], 'data_selection': {'Antennae_North.cal.lsrk.split_0': {'frequency': slice(np.int64(3), np.int64(6), None)}, 'Antennae_North.cal.lsrk