# Processing Set Visibility Tutorial

## Import xradio

In [1]:
import os, pprint
from importlib.metadata import version

try:
    os.system("pip install --upgrade xradio")

    import xradio

    print("Using xradio version", version("xradio"))

except ImportError as exc:
    print(f"Could not import xradio: {exc}")

Using xradio version 0.0.40


## Download example MSv2

## Preparation

In [2]:
import toolviper

toolviper.utils.data.download(file="Antennae_North.cal.lsrk.split.ms")

[[38;2;128;05;128m2024-09-11 13:38:53,594[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m   toolviper: [0m Updating file metadata information ...  
 

 

Antennae_North.cal.lsrk.split.ms.zip:   0%|          | 0.00/1.49M [00:00<?, ?iB/s]

## Processing Set

## Convert MSv2 => Processing Set (PS)

In [3]:
from xradio.vis.convert_msv2_to_processing_set import convert_msv2_to_processing_set

msv2_name = "Antennae_North.cal.lsrk.split.ms"
convert_out = "Antennae_North.cal.lsrk.split.vis.zarr"

convert_msv2_to_processing_set(
    in_file=msv2_name,
    out_file=convert_out,
    overwrite=True,
)

[[38;2;128;05;128m2024-09-11 13:38:59,059[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m   toolviper: [0m Partition scheme that will be used: ['DATA_DESC_ID', 'OBS_MODE', 'OBSERVATION_ID', 'FIELD_ID'] 
[[38;2;128;05;128m2024-09-11 13:38:59,141[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m   toolviper: [0m Number of partitions: 12 
[[38;2;128;05;128m2024-09-11 13:38:59,142[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m   toolviper: [0m OBSERVATION_ID [0], DDI [0], STATE [32 23 30 37], FIELD [0], SCAN [ 9 17 21 25] 
[[38;2;128;05;128m2024-09-11 13:38:59,293[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m   toolviper: [0m OBSERVATION_ID [0], DDI [0], STATE [33 24 31], FIELD [1], SCAN [ 9 17 21] 
[[38;2;128;05;128m2024-09-11 13:38:59,431[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m   toolviper: [0m OBSERVATION_ID [0], DDI [0], STATE [34 25 32], FIELD [2], SCAN [ 9 17 21] 
[[38;2;128;05;128m2024-09-11 13:38:59,622[0m] [38;2;50;50;205m    INF

## Lazy read PS

In [4]:
from xradio.vis.read_processing_set import read_processing_set
convert_out = "Antennae_North.cal.lsrk.split.vis.zarr"

ps = read_processing_set(convert_out, obs_modes=["OBSERVE_TARGET#ON_SOURCE"])

In [5]:
ps.summary()

Unnamed: 0,name,obs_mode,shape,polarization,scan_number,spw_name,field_name,source_name,line_name,field_coords,start_frequency,end_frequency
6,Antennae_North.cal.lsrk.split_00,[OBSERVE_TARGET#ON_SOURCE],"(20, 45, 8, 2)","[XX, YY]","[9, 17, 21, 25]",spw_0,[NGC4038 - Antennae North_0],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m52.43s, -18d52m02.92s]",343928100000.0,344006700000.0
4,Antennae_North.cal.lsrk.split_01,[OBSERVE_TARGET#ON_SOURCE],"(15, 45, 8, 2)","[XX, YY]","[9, 17, 21]",spw_0,[NGC4038 - Antennae North_1],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m52.96s, -18d52m02.92s]",343928100000.0,344006700000.0
11,Antennae_North.cal.lsrk.split_02,[OBSERVE_TARGET#ON_SOURCE],"(15, 45, 8, 2)","[XX, YY]","[9, 17, 21]",spw_0,[NGC4038 - Antennae North_2],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m53.49s, -18d52m02.92s]",343928100000.0,344006700000.0
1,Antennae_North.cal.lsrk.split_03,[OBSERVE_TARGET#ON_SOURCE],"(20, 55, 8, 2)","[XX, YY]","[26, 34, 38, 42]",spw_0,[NGC4038 - Antennae North_0],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m52.43s, -18d52m02.92s]",343928100000.0,344006700000.0
5,Antennae_North.cal.lsrk.split_04,[OBSERVE_TARGET#ON_SOURCE],"(15, 55, 8, 2)","[XX, YY]","[26, 34, 38]",spw_0,[NGC4038 - Antennae North_1],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m52.96s, -18d52m02.92s]",343928100000.0,344006700000.0
8,Antennae_North.cal.lsrk.split_05,[OBSERVE_TARGET#ON_SOURCE],"(15, 55, 8, 2)","[XX, YY]","[26, 34, 38]",spw_0,[NGC4038 - Antennae North_2],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m53.49s, -18d52m02.92s]",343928100000.0,344006700000.0
7,Antennae_North.cal.lsrk.split_06,[OBSERVE_TARGET#ON_SOURCE],"(5, 55, 8, 2)","[XX, YY]",[43],spw_0,[NGC4038 - Antennae North_0],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m52.43s, -18d52m02.92s]",343928100000.0,344006700000.0
10,Antennae_North.cal.lsrk.split_07,[OBSERVE_TARGET#ON_SOURCE],"(5, 55, 8, 2)","[XX, YY]",[43],spw_0,[NGC4038 - Antennae North_1],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m52.96s, -18d52m02.92s]",343928100000.0,344006700000.0
0,Antennae_North.cal.lsrk.split_08,[OBSERVE_TARGET#ON_SOURCE],"(5, 55, 8, 2)","[XX, YY]",[43],spw_0,[NGC4038 - Antennae North_2],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m53.49s, -18d52m02.92s]",343928100000.0,344006700000.0
9,Antennae_North.cal.lsrk.split_09,"[OBSERVE_TARGET#ON_SOURCE, CALIBRATE_WVR#ON_SO...","(20, 77, 8, 2)","[XX, YY]","[48, 56, 60, 64]",spw_0,[NGC4038 - Antennae North_0],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m52.43s, -18d52m02.92s]",343928100000.0,344006700000.0


## PS Structure

A processing set is simply a dictionary of MSv4s (one per observation, field, intent, spectral window - polarization...):

In [6]:
len(ps)

12

In [7]:
ps.keys()

dict_keys(['Antennae_North.cal.lsrk.split_08', 'Antennae_North.cal.lsrk.split_03', 'Antennae_North.cal.lsrk.split_11', 'Antennae_North.cal.lsrk.split_10', 'Antennae_North.cal.lsrk.split_01', 'Antennae_North.cal.lsrk.split_04', 'Antennae_North.cal.lsrk.split_00', 'Antennae_North.cal.lsrk.split_06', 'Antennae_North.cal.lsrk.split_05', 'Antennae_North.cal.lsrk.split_09', 'Antennae_North.cal.lsrk.split_07', 'Antennae_North.cal.lsrk.split_02'])

## MSv4


## Main dataset

We can take one of the items of the Processing Set to look into the contents of that MSv4. Every MSv4 represents the data as an xarray dataset, similarly as in earlier CNGI prototypes. The data variables (visibilities, weights, flags, etc.) can be manipulated and used in computations using the xarray API.

In [8]:
main_xds = ps[
    "Antennae_North.cal.lsrk.split_00"
]

### Coordinates

In [9]:
main_xds

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 160 B 160 B Shape (20,) (20,) Dask graph 1 chunks in 2 graph layers Data type int64 numpy.ndarray",20  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.03 kiB,7.03 kiB
Shape,"(20, 45)","(20, 45)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 7.03 kiB 7.03 kiB Shape (20, 45) (20, 45) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",45  20,

Unnamed: 0,Array,Chunk
Bytes,7.03 kiB,7.03 kiB
Shape,"(20, 45)","(20, 45)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,14.06 kiB,14.06 kiB
Shape,"(20, 45, 8, 2)","(20, 45, 8, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 14.06 kiB 14.06 kiB Shape (20, 45, 8, 2) (20, 45, 8, 2) Dask graph 1 chunks in 2 graph layers Data type bool numpy.ndarray",20  1  2  8  45,

Unnamed: 0,Array,Chunk
Bytes,14.06 kiB,14.06 kiB
Shape,"(20, 45, 8, 2)","(20, 45, 8, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.03 kiB,7.03 kiB
Shape,"(20, 45)","(20, 45)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 7.03 kiB 7.03 kiB Shape (20, 45) (20, 45) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",45  20,

Unnamed: 0,Array,Chunk
Bytes,7.03 kiB,7.03 kiB
Shape,"(20, 45)","(20, 45)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,21.09 kiB,21.09 kiB
Shape,"(20, 45, 3)","(20, 45, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 21.09 kiB 21.09 kiB Shape (20, 45, 3) (20, 45, 3) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",3  45  20,

Unnamed: 0,Array,Chunk
Bytes,21.09 kiB,21.09 kiB
Shape,"(20, 45, 3)","(20, 45, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,112.50 kiB,112.50 kiB
Shape,"(20, 45, 8, 2)","(20, 45, 8, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,complex64 numpy.ndarray,complex64 numpy.ndarray
"Array Chunk Bytes 112.50 kiB 112.50 kiB Shape (20, 45, 8, 2) (20, 45, 8, 2) Dask graph 1 chunks in 2 graph layers Data type complex64 numpy.ndarray",20  1  2  8  45,

Unnamed: 0,Array,Chunk
Bytes,112.50 kiB,112.50 kiB
Shape,"(20, 45, 8, 2)","(20, 45, 8, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,complex64 numpy.ndarray,complex64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,56.25 kiB,56.25 kiB
Shape,"(20, 45, 8, 2)","(20, 45, 8, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 56.25 kiB 56.25 kiB Shape (20, 45, 8, 2) (20, 45, 8, 2) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",20  1  2  8  45,

Unnamed: 0,Array,Chunk
Bytes,56.25 kiB,56.25 kiB
Shape,"(20, 45, 8, 2)","(20, 45, 8, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [10]:
main_xds.polarization

In [11]:
main_xds.uvw_label

In [12]:
main_xds.coords["baseline_id"]

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,


In [13]:
main_xds.time

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 160 B 160 B Shape (20,) (20,) Dask graph 1 chunks in 2 graph layers Data type int64 numpy.ndarray",20  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray


### Data vars

In [14]:
main_xds.VISIBILITY

Unnamed: 0,Array,Chunk
Bytes,112.50 kiB,112.50 kiB
Shape,"(20, 45, 8, 2)","(20, 45, 8, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,complex64 numpy.ndarray,complex64 numpy.ndarray
"Array Chunk Bytes 112.50 kiB 112.50 kiB Shape (20, 45, 8, 2) (20, 45, 8, 2) Dask graph 1 chunks in 2 graph layers Data type complex64 numpy.ndarray",20  1  2  8  45,

Unnamed: 0,Array,Chunk
Bytes,112.50 kiB,112.50 kiB
Shape,"(20, 45, 8, 2)","(20, 45, 8, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,complex64 numpy.ndarray,complex64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 160 B 160 B Shape (20,) (20,) Dask graph 1 chunks in 2 graph layers Data type int64 numpy.ndarray",20  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray


In [15]:
main_xds.FLAG

Unnamed: 0,Array,Chunk
Bytes,14.06 kiB,14.06 kiB
Shape,"(20, 45, 8, 2)","(20, 45, 8, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 14.06 kiB 14.06 kiB Shape (20, 45, 8, 2) (20, 45, 8, 2) Dask graph 1 chunks in 2 graph layers Data type bool numpy.ndarray",20  1  2  8  45,

Unnamed: 0,Array,Chunk
Bytes,14.06 kiB,14.06 kiB
Shape,"(20, 45, 8, 2)","(20, 45, 8, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 160 B 160 B Shape (20,) (20,) Dask graph 1 chunks in 2 graph layers Data type int64 numpy.ndarray",20  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray


In [16]:
main_xds.VISIBILITY.max()

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,complex64 numpy.ndarray,complex64 numpy.ndarray
Array Chunk Bytes 8 B 8 B Shape () () Dask graph 1 chunks in 4 graph layers Data type complex64 numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,complex64 numpy.ndarray,complex64 numpy.ndarray


In [17]:
main_xds.VISIBILITY.max().compute()
# main_xds.VISIBILITY.max().values

## Metadata

The MS metadata can be found in the attributes of the `main_xds`. Metadata is stored in differente ways:
- in additional xarray sub-datasets, "sub-xds"
- in attributes of coordinates and data variables
- in Python dictionaries.

Most sub-xds are found in the attributes of the `main_xds`, but there are also sub-xds in the attributes of some data variables.
An example of sub-xds of the `main_xds` is the antenna dataset (`antenna_xds`). An example of dictionary is the partition info dict.

### Metadata in sub-xds. Antenna dataset

The MSv4 has xarray datasets in its attributes that represent metadata where n-dimensional arrays is included. Some examples are the `antenna_xds`, `weather_xds` and `pointing_xds`. This would be the equivalent to subtables of the MSv2. Let's look into the antenna sub-xds:


In [18]:
ant_xds = main_xds.attrs["antenna_xds"]

In [19]:
ant_xds

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 240 B 240 B Shape (10,) (10,) Dask graph 1 chunks in 2 graph layers Data type",10  1,

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,80 B,80 B
Shape,"(10, 2)","(10, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 80 B 80 B Shape (10, 2) (10, 2) Dask graph 1 chunks in 2 graph layers Data type",2  10,

Unnamed: 0,Array,Chunk
Bytes,80 B,80 B
Shape,"(10, 2)","(10, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 160 B 160 B Shape (10,) (10,) Dask graph 1 chunks in 2 graph layers Data type",10  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,80 B,80 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 80 B 80 B Shape (10,) (10,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",10  1,

Unnamed: 0,Array,Chunk
Bytes,80 B,80 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10, 3)","(10, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 240 B 240 B Shape (10, 3) (10, 3) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",3  10,

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10, 3)","(10, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10, 3)","(10, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 240 B 240 B Shape (10, 3) (10, 3) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",3  10,

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10, 3)","(10, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,320 B,320 B
Shape,"(10, 2, 2)","(10, 2, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 320 B 320 B Shape (10, 2, 2) (10, 2, 2) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2  2  10,

Unnamed: 0,Array,Chunk
Bytes,320 B,320 B
Shape,"(10, 2, 2)","(10, 2, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10, 2)","(10, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 160 B 160 B Shape (10, 2) (10, 2) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2  10,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10, 2)","(10, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


As an xarray dataset, the antenna sub-xds can be used via the same API as the main xds.

In [20]:
ant_xds.ANTENNA_POSITION  # .values to load and see them

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10, 3)","(10, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 240 B 240 B Shape (10, 3) (10, 3) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",3  10,

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10, 3)","(10, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 240 B 240 B Shape (10,) (10,) Dask graph 1 chunks in 2 graph layers Data type",10  1,

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 160 B 160 B Shape (10,) (10,) Dask graph 1 chunks in 2 graph layers Data type",10  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,


In [21]:
ant_xds.antenna_name.values

array(['DV02_A015', 'DV06_T704', 'DV07_A004', 'DV08_A072', 'DV09_A008',
       'DV10_A009', 'DV11_A016', 'PM01_T702', 'PM02_A017', 'PM03_J504'],
      dtype='<U9')

In [22]:
ant_xds.BEAM_OFFSET

Unnamed: 0,Array,Chunk
Bytes,320 B,320 B
Shape,"(10, 2, 2)","(10, 2, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 320 B 320 B Shape (10, 2, 2) (10, 2, 2) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2  2  10,

Unnamed: 0,Array,Chunk
Bytes,320 B,320 B
Shape,"(10, 2, 2)","(10, 2, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 240 B 240 B Shape (10,) (10,) Dask graph 1 chunks in 2 graph layers Data type",10  1,

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,80 B,80 B
Shape,"(10, 2)","(10, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 80 B 80 B Shape (10, 2) (10, 2) Dask graph 1 chunks in 2 graph layers Data type",2  10,

Unnamed: 0,Array,Chunk
Bytes,80 B,80 B
Shape,"(10, 2)","(10, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 160 B 160 B Shape (10,) (10,) Dask graph 1 chunks in 2 graph layers Data type",10  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,


In [23]:
ant_xds.ANTENNA_DISH_DIAMETER

Unnamed: 0,Array,Chunk
Bytes,80 B,80 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 80 B 80 B Shape (10,) (10,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",10  1,

Unnamed: 0,Array,Chunk
Bytes,80 B,80 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 240 B 240 B Shape (10,) (10,) Dask graph 1 chunks in 2 graph layers Data type",10  1,

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 160 B 160 B Shape (10,) (10,) Dask graph 1 chunks in 2 graph layers Data type",10  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,


### Attributes of Data Arrays and Coordinates. Quantities and Measures

All data variables and coordinates can have quantity and measures information in their attributes section along with other relevant metadata. These measures are specified as dictionaries in the attribute of the data variable or coordinate, with keys `units` and `type` in addition to other keys depending on the type of quantity. The naming conventions are based on `astropy`. For example a quantity of casacore/`position` type, such as the antenna positions, is a quantity with `type: "earth_location"`

For reference, this is the list of measures in the current Processing Set/MSv4 spec:
https://docs.google.com/spreadsheets/d/14a6qMap9M5r_vjpLnaBKxsR9TF4azN5LVdOxLacOX-s/edit#gid=1504318014, with naming conventions based on astropy. For example, a casacore `direction` is a `sky_coord`.



#### Time coordinate
The time coordinate is a time measure (keys: `type`, `units`, `time_scale`, `format`) but also contains for example `integration_time` which is a quantity.

In [24]:
main_xds.time

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 160 B 160 B Shape (20,) (20,) Dask graph 1 chunks in 2 graph layers Data type int64 numpy.ndarray",20  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray


##### Quantities and measures that are not xarray

When a quantity or a measure is not an xarray, it is specificed as a dictionary with a format based on xarray's [xarray.DataArray.from_dict()](https://docs.xarray.dev/en/stable/generated/xarray.DataArray.from_dict.html) and it has the following keys:
`{"dims": ..., "data": ..., "attrs": quantity/measures_dict}`. The `integration_time` attribute included in the  attributes of the time coordinate is an example:

In [25]:
pprint.pprint(main_xds.time.attrs)

{'effective_integration_time': 'EFFECTIVE_INTEGRATION_TIME',
 'format': 'UNIX',
 'integration_time': {'attrs': {'type': 'quantity', 'units': ['s']},
                      'data': 6.048,
                      'dims': []},
 'scale': 'utc',
 'type': 'time',
 'units': ['s']}


#### Frequency coordinate

The `frequency` coordinate is a `spectral_coord` measure and as such has the following keys in its attributes: `type`, `units`, and `frame`. In addition, the attributes contain the `channel_width`, `spectral_window_name`, and `reference_frequency`.

Any metadata that is a quantity or measure (non-id numbers) is placed in the relevant measures or quantity dictionary.

In [26]:
main_xds.frequency

In the frequency coordinate we have example of:
- quantity given as a dict: `channel_width`
- measure given as a dict: `reference_frequency` (a `spectral_coord` ~= casacore/frequency)

In [27]:
pprint.pprint(main_xds.frequency.attrs)

{'channel_width': {'attrs': {'type': 'quantity', 'units': ['Hz']},
                   'data': 11231488.981445312,
                   'dims': []},
 'frame': 'LSRK',
 'reference_frequency': {'attrs': {'frame': 'LSRK',
                                   'type': 'spectral_coord',
                                   'units': ['Hz']},
                         'data': 343928096685.9587,
                         'dims': []},
 'spectral_window_id': 0,
 'spectral_window_name': 'spw_0',
 'type': 'spectral_coord',
 'units': ['Hz']}


### Metadata in dictionaries. Partition info.

The MSv4 also allows for info dictionaries in the attribute section of the dataset. This is used when no n-dimensional data is required. The relevant measures metadata is included, similarly as with coordinates and data variables (when non-id) in xarray datasets.

An example is the `partition_info` dict, which describes the data partition of the input used to produce an MSv4.

An example is the field_info where the delay_direction, phase_direction, and reference_direction are stored as `sky_coord` measures (keys: `type`, `units`, `reference_frame`).

In [28]:
main_xds.partition_info

{'field_name': ['NGC4038 - Antennae North_0'],
 'line_name': [],
 'num_lines': 0,
 'obs_mode': ['OBSERVE_TARGET#ON_SOURCE'],
 'polarization_setup': ['XX', 'YY'],
 'scan_number': [9, 17, 21, 25],
 'source_name': ['NGC4038 - Antennae North_0'],
 'spectral_window_name': 'spw_0',
 'taql': 'WHERE (DATA_DESC_ID IN [0]) AND(OBSERVATION_ID IN [0]) AND(STATE_ID IN [32,23,30,37]) AND(FIELD_ID IN [0]) AND(SCAN_NUMBER IN [9,17,21,25]) AND(STATE_ID IN [32,23,30,37]) '}

#### Other info dicts

Besides the `partition_info` dict, every MSv4 has general metadata in the `observation_info` and `processor_info` dictionaries:

In [29]:
main_xds.observation_info

{'observer': ['Unknown'],
 'project': 'T.B.D.',
 'release_date': '1858-11-17T00:00:00.000000000'}

In [30]:
main_xds.processor_info

{'sub_type': 'ALMA_CORRELATOR_MODE', 'type': 'CORRELATOR'}

### Metadata in sub-xds of data variables. Field_and_source sub-dataset.

A special example of sub-xds is the `xds` which is included in the attributes of the VISIBILITY data variable. This way, transformations applied on the visibilities can be reflected in variables such as the field phase center or the source direction. Here data variables such as `FIELD_PHASE_CENTER` or `SOURCE_DIRECTION` are stored as `sky_coord` measures (their attributes contain the following keys: `type`, `units`, `frame`).

In [31]:
field_and_source_xds = main_xds.VISIBILITY.field_and_source_xds

In [32]:
field_and_source_xds

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16 B 16 B Shape (2,) (2,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16 B 16 B Shape (2,) (2,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [33]:
field_and_source_xds.FIELD_PHASE_CENTER

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16 B 16 B Shape (2,) (2,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [34]:
field_and_source_xds.SOURCE_LOCATION

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16 B 16 B Shape (2,) (2,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


## Selection examples

One can use the usual selection functionality of xarray with all arrays, the main dataset and all sub datasets. For example, selection by labels, `sel()`:

In [35]:
sel_xds = main_xds.sel(frequency=slice(3.43939e11, 3.4397e11))
sel_xds.frequency

Or selection by indices, `isel()`

In [36]:
isel_xds = main_xds.isel(frequency=slice(1, 4))
isel_xds.frequency

In [37]:
sel_xds.equals(isel_xds)

True

In [38]:
sel_xds.identical(isel_xds)

True