## Import xradio

In [1]:
import os, pprint
from importlib.metadata import version

try:
    os.system("pip install --upgrade xradio")

    import xradio

    print("Using xradio version", version("xradio"))

except ImportError as exc:
    print(f"Could not import xradio: {exc}")

Using xradio version 0.0.40


## Download example MSv2

# Preparation

In [2]:
from graphviper.utils.data import download

msv2_name = "Antennae_North.cal.lsrk.ms"
download(file=msv2_name)

[[38;2;128;05;128m2024-09-12 18:07:41,313[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m   toolviper: [0m Updating file metadata information ...  


 

[[38;2;128;05;128m2024-09-12 18:07:42,195[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m   toolviper: [0m File exists: Antennae_North.cal.lsrk.ms 


# Processing Set

## Convert MSv2 => Processing Set (PS)

In [3]:
from xradio.vis.convert_msv2_to_processing_set import convert_msv2_to_processing_set

msv2_name = "Antennae_North.cal.lsrk.ms"
convert_out = "Antennae_North.cal.lsrk.vis.zarr"
convert_msv2_to_processing_set(
    in_file=msv2_name,
    out_file=convert_out,
    overwrite=True,
)

[[38;2;128;05;128m2024-09-12 18:07:43,499[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m   toolviper: [0m Partition scheme that will be used: ['DATA_DESC_ID', 'OBS_MODE', 'OBSERVATION_ID', 'FIELD_ID'] 
[[38;2;128;05;128m2024-09-12 18:07:43,681[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m   toolviper: [0m Number of partitions: 92 
[[38;2;128;05;128m2024-09-12 18:07:43,681[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m   toolviper: [0m OBSERVATION_ID [0], DDI [0], STATE [22 29 36 27], FIELD [1], SCAN [ 9 13 17 25] 
[[38;2;128;05;128m2024-09-12 18:07:43,856[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m   toolviper: [0m OBSERVATION_ID [0], DDI [0], STATE [23 30 37 28], FIELD [2], SCAN [ 9 13 17 25] 
[[38;2;128;05;128m2024-09-12 18:07:44,038[0m] [38;2;50;50;205m    INFO[0m[38;2;112;128;144m   toolviper: [0m OBSERVATION_ID [0], DDI [0], STATE [24 31 22 29], FIELD [3], SCAN [ 9 13 21 25] 
[[38;2;128;05;128m2024-09-12 18:07:44,508[0m] [38;2;50;50

## Lazy read PS

In [4]:
from xradio.vis.read_processing_set import read_processing_set

convert_out = "Antennae_North.cal.lsrk.vis.zarr"
obs_modes = ["OBSERVE_TARGET#ON_SOURCE"]
# ps = read_processing_set(convert_out, obs_modes=obs_modes)
ps = read_processing_set(convert_out)

In [5]:
ps.summary()

Unnamed: 0,name,obs_mode,shape,polarization,scan_number,spw_name,field_name,source_name,line_name,field_coords,start_frequency,end_frequency
48,Antennae_North.cal.lsrk_00,[OBSERVE_TARGET#ON_SOURCE],"(20, 45, 166, 2)","[XX, YY]","[9, 13, 17, 25]",spw_0,[NGC4038 - Antennae North_1],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m51.90s, -18d51m49.94s]",3.430183e+11,3.448715e+11
60,Antennae_North.cal.lsrk_01,[OBSERVE_TARGET#ON_SOURCE],"(20, 45, 166, 2)","[XX, YY]","[9, 13, 17, 25]",spw_0,[NGC4038 - Antennae North_2],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m52.43s, -18d51m49.94s]",3.430183e+11,3.448715e+11
19,Antennae_North.cal.lsrk_02,[OBSERVE_TARGET#ON_SOURCE],"(20, 45, 166, 2)","[XX, YY]","[9, 13, 21, 25]",spw_0,[NGC4038 - Antennae North_3],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m52.96s, -18d51m49.94s]",3.430183e+11,3.448715e+11
6,Antennae_North.cal.lsrk_03,[OBSERVE_TARGET#ON_SOURCE],"(20, 45, 166, 2)","[XX, YY]","[9, 13, 21, 25]",spw_0,[NGC4038 - Antennae North_4],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m53.49s, -18d51m49.94s]",3.430183e+11,3.448715e+11
7,Antennae_North.cal.lsrk_04,[OBSERVE_TARGET#ON_SOURCE],"(20, 45, 166, 2)","[XX, YY]","[9, 13, 21, 25]",spw_0,[NGC4038 - Antennae North_5],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m54.01s, -18d51m49.94s]",3.430183e+11,3.448715e+11
...,...,...,...,...,...,...,...,...,...,...,...,...
30,Antennae_North.cal.lsrk_87,"[OBSERVE_TARGET#ON_SOURCE, CALIBRATE_WVR#ON_SO...","(15, 77, 166, 2)","[XX, YY]","[52, 56, 64]",spw_0,[NGC4038 - Antennae North_19],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m51.90s, -18d52m15.90s]",3.430183e+11,3.448715e+11
44,Antennae_North.cal.lsrk_88,"[OBSERVE_TARGET#ON_SOURCE, CALIBRATE_WVR#ON_SO...","(15, 77, 166, 2)","[XX, YY]","[52, 56, 64]",spw_0,[NGC4038 - Antennae North_20],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m52.43s, -18d52m15.90s]",3.430183e+11,3.448715e+11
26,Antennae_North.cal.lsrk_89,"[OBSERVE_TARGET#ON_SOURCE, CALIBRATE_WVR#ON_SO...","(15, 77, 166, 2)","[XX, YY]","[52, 56, 64]",spw_0,[NGC4038 - Antennae North_21],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m52.96s, -18d52m15.90s]",3.430183e+11,3.448715e+11
67,Antennae_North.cal.lsrk_90,"[OBSERVE_TARGET#ON_SOURCE, CALIBRATE_WVR#ON_SO...","(15, 77, 166, 2)","[XX, YY]","[52, 56, 64]",spw_0,[NGC4038 - Antennae North_22],[NGC4038 - Antennae North_0],[],"[fk5, 12h01m53.49s, -18d52m15.90s]",3.430183e+11,3.448715e+11


## PS Structure

A processing set is simply a dictionary of MSv4s (one per observation, field, intent, spectral window - polarization...):

In [6]:
len(ps)

92

In [7]:
ps.keys()

dict_keys(['Antennae_North.cal.lsrk_61', 'Antennae_North.cal.lsrk_66', 'Antennae_North.cal.lsrk_59', 'Antennae_North.cal.lsrk_50', 'Antennae_North.cal.lsrk_57', 'Antennae_North.cal.lsrk_68', 'Antennae_North.cal.lsrk_03', 'Antennae_North.cal.lsrk_04', 'Antennae_North.cal.lsrk_32', 'Antennae_North.cal.lsrk_35', 'Antennae_North.cal.lsrk_56', 'Antennae_North.cal.lsrk_69', 'Antennae_North.cal.lsrk_51', 'Antennae_North.cal.lsrk_67', 'Antennae_North.cal.lsrk_58', 'Antennae_North.cal.lsrk_60', 'Antennae_North.cal.lsrk_34', 'Antennae_North.cal.lsrk_33', 'Antennae_North.cal.lsrk_05', 'Antennae_North.cal.lsrk_02', 'Antennae_North.cal.lsrk_20', 'Antennae_North.cal.lsrk_27', 'Antennae_North.cal.lsrk_18', 'Antennae_North.cal.lsrk_11', 'Antennae_North.cal.lsrk_16', 'Antennae_North.cal.lsrk_29', 'Antennae_North.cal.lsrk_89', 'Antennae_North.cal.lsrk_42', 'Antennae_North.cal.lsrk_45', 'Antennae_North.cal.lsrk_73', 'Antennae_North.cal.lsrk_87', 'Antennae_North.cal.lsrk_80', 'Antennae_North.cal.lsrk_74',

# MSv4


## Main dataset

We can take one of the items of the Processing Set to look into the contents of that MSv4. Every MSv4 represents the data as an xarray dataset, similarly as in earlier CNGI prototypes. The data variables (visibilities, weights, flags, etc.) can be manipulated and used in computations using the xarray API.

In [8]:
main_xds = ps[
    "Antennae_North.cal.lsrk_01"
]

In [9]:
main_xds

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 160 B 160 B Shape (20,) (20,) Dask graph 1 chunks in 2 graph layers Data type int64 numpy.ndarray",20  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.03 kiB,7.03 kiB
Shape,"(20, 45)","(20, 45)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 7.03 kiB 7.03 kiB Shape (20, 45) (20, 45) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",45  20,

Unnamed: 0,Array,Chunk
Bytes,7.03 kiB,7.03 kiB
Shape,"(20, 45)","(20, 45)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,291.80 kiB,291.80 kiB
Shape,"(20, 45, 166, 2)","(20, 45, 166, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 291.80 kiB 291.80 kiB Shape (20, 45, 166, 2) (20, 45, 166, 2) Dask graph 1 chunks in 2 graph layers Data type bool numpy.ndarray",20  1  2  166  45,

Unnamed: 0,Array,Chunk
Bytes,291.80 kiB,291.80 kiB
Shape,"(20, 45, 166, 2)","(20, 45, 166, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.03 kiB,7.03 kiB
Shape,"(20, 45)","(20, 45)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 7.03 kiB 7.03 kiB Shape (20, 45) (20, 45) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",45  20,

Unnamed: 0,Array,Chunk
Bytes,7.03 kiB,7.03 kiB
Shape,"(20, 45)","(20, 45)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,21.09 kiB,21.09 kiB
Shape,"(20, 45, 3)","(20, 45, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 21.09 kiB 21.09 kiB Shape (20, 45, 3) (20, 45, 3) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",3  45  20,

Unnamed: 0,Array,Chunk
Bytes,21.09 kiB,21.09 kiB
Shape,"(20, 45, 3)","(20, 45, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.28 MiB,2.28 MiB
Shape,"(20, 45, 166, 2)","(20, 45, 166, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,complex64 numpy.ndarray,complex64 numpy.ndarray
"Array Chunk Bytes 2.28 MiB 2.28 MiB Shape (20, 45, 166, 2) (20, 45, 166, 2) Dask graph 1 chunks in 2 graph layers Data type complex64 numpy.ndarray",20  1  2  166  45,

Unnamed: 0,Array,Chunk
Bytes,2.28 MiB,2.28 MiB
Shape,"(20, 45, 166, 2)","(20, 45, 166, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,complex64 numpy.ndarray,complex64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.14 MiB,1.14 MiB
Shape,"(20, 45, 166, 2)","(20, 45, 166, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.14 MiB 1.14 MiB Shape (20, 45, 166, 2) (20, 45, 166, 2) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",20  1  2  166  45,

Unnamed: 0,Array,Chunk
Bytes,1.14 MiB,1.14 MiB
Shape,"(20, 45, 166, 2)","(20, 45, 166, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


#### Coordinates

In [10]:
main_xds.polarization

In [11]:
main_xds.uvw_label

In [12]:
main_xds.coords["baseline_id"]

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,


In [13]:
main_xds.time

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 160 B 160 B Shape (20,) (20,) Dask graph 1 chunks in 2 graph layers Data type int64 numpy.ndarray",20  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray


#### Data vars

In [14]:
main_xds.VISIBILITY

Unnamed: 0,Array,Chunk
Bytes,2.28 MiB,2.28 MiB
Shape,"(20, 45, 166, 2)","(20, 45, 166, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,complex64 numpy.ndarray,complex64 numpy.ndarray
"Array Chunk Bytes 2.28 MiB 2.28 MiB Shape (20, 45, 166, 2) (20, 45, 166, 2) Dask graph 1 chunks in 2 graph layers Data type complex64 numpy.ndarray",20  1  2  166  45,

Unnamed: 0,Array,Chunk
Bytes,2.28 MiB,2.28 MiB
Shape,"(20, 45, 166, 2)","(20, 45, 166, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,complex64 numpy.ndarray,complex64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 160 B 160 B Shape (20,) (20,) Dask graph 1 chunks in 2 graph layers Data type int64 numpy.ndarray",20  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray


In [15]:
main_xds.FLAG

Unnamed: 0,Array,Chunk
Bytes,291.80 kiB,291.80 kiB
Shape,"(20, 45, 166, 2)","(20, 45, 166, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 291.80 kiB 291.80 kiB Shape (20, 45, 166, 2) (20, 45, 166, 2) Dask graph 1 chunks in 2 graph layers Data type bool numpy.ndarray",20  1  2  166  45,

Unnamed: 0,Array,Chunk
Bytes,291.80 kiB,291.80 kiB
Shape,"(20, 45, 166, 2)","(20, 45, 166, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 1.58 kiB 1.58 kiB Shape (45,) (45,) Dask graph 1 chunks in 2 graph layers Data type",45  1,

Unnamed: 0,Array,Chunk
Bytes,1.58 kiB,1.58 kiB
Shape,"(45,)","(45,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 160 B 160 B Shape (20,) (20,) Dask graph 1 chunks in 2 graph layers Data type int64 numpy.ndarray",20  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray


In [16]:
main_xds.VISIBILITY.max()

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,complex64 numpy.ndarray,complex64 numpy.ndarray
Array Chunk Bytes 8 B 8 B Shape () () Dask graph 1 chunks in 4 graph layers Data type complex64 numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Dask graph,1 chunks in 4 graph layers,1 chunks in 4 graph layers
Data type,complex64 numpy.ndarray,complex64 numpy.ndarray


In [17]:
main_xds.VISIBILITY.max().compute()
# main_xds.VISIBILITY.max().values

## Metadata

The MS metadata can be found in the attributes of the main_xds. Metadata is stored in differente ways:
- in additional xarray (sub)datasets, "sub-xds"
- in attributes of coordinates and data variables
- in Python dictionaries.

An example of sub-xds is the antenna dataset. And example of dictionary is the Field info dict.

### Metadata in sub-xds. Antenna dataset

The MSv4 has xarray datasets in its attributes that represent metadata where n-dimensional arrays is included. This would be the equivalent to subtables of the MSv2. Let's look into the antenna sub-xds:


In [18]:
ant_xds = main_xds.attrs["antenna_xds"]

In [19]:
ant_xds

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 240 B 240 B Shape (10,) (10,) Dask graph 1 chunks in 2 graph layers Data type",10  1,

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,80 B,80 B
Shape,"(10, 2)","(10, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 80 B 80 B Shape (10, 2) (10, 2) Dask graph 1 chunks in 2 graph layers Data type",2  10,

Unnamed: 0,Array,Chunk
Bytes,80 B,80 B
Shape,"(10, 2)","(10, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 160 B 160 B Shape (10,) (10,) Dask graph 1 chunks in 2 graph layers Data type",10  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,80 B,80 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 80 B 80 B Shape (10,) (10,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",10  1,

Unnamed: 0,Array,Chunk
Bytes,80 B,80 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10, 3)","(10, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 240 B 240 B Shape (10, 3) (10, 3) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",3  10,

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10, 3)","(10, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10, 3)","(10, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 240 B 240 B Shape (10, 3) (10, 3) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",3  10,

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10, 3)","(10, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,320 B,320 B
Shape,"(10, 2, 2)","(10, 2, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 320 B 320 B Shape (10, 2, 2) (10, 2, 2) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2  2  10,

Unnamed: 0,Array,Chunk
Bytes,320 B,320 B
Shape,"(10, 2, 2)","(10, 2, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10, 2)","(10, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 160 B 160 B Shape (10, 2) (10, 2) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2  10,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10, 2)","(10, 2)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


As an xarray dataset, the antenna sub-xds can be used via the same API as the main xds.

In [20]:
ant_xds.ANTENNA_POSITION  # .values to load and see them

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10, 3)","(10, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 240 B 240 B Shape (10, 3) (10, 3) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",3  10,

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10, 3)","(10, 3)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 240 B 240 B Shape (10,) (10,) Dask graph 1 chunks in 2 graph layers Data type",10  1,

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 160 B 160 B Shape (10,) (10,) Dask graph 1 chunks in 2 graph layers Data type",10  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(10,)","(10,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,


### Attributes of Data Arrays and Coordinates. Quantities and Measures

All data variables and coordinates can have quantity and measures information in their attributes section along with other relevant metadata. These measures are specified as dictionaries in the attribute of the data variable or coordinate, with keys `units` and `type` in addition to other keys depending on the type of quantity. The naming conventions are based on `astropy`. For example a quantity of casacore/`position` type, such as the antenna positions, is a quantity with `type: "earth_location"`

For reference, this is the list of measures in the current Processing Set/MSv4 spec:
https://docs.google.com/spreadsheets/d/14a6qMap9M5r_vjpLnaBKxsR9TF4azN5LVdOxLacOX-s/edit#gid=1504318014, with naming conventions based on astropy. For example, a casacore `direction` is a `sky_coord`.



#### Time coordinate
The time coordinate is a time measure (keys: `type`, `units`, `time_scale`, `format`) but also contains for example `integration_time` which is a quantity.

In [21]:
main_xds.time

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 160 B 160 B Shape (20,) (20,) Dask graph 1 chunks in 2 graph layers Data type int64 numpy.ndarray",20  1,

Unnamed: 0,Array,Chunk
Bytes,160 B,160 B
Shape,"(20,)","(20,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray


##### Quantities and measures that are not xarray

When a quantity or a measure is not an xarray, it is specificed as a dictionary with a format based on xarray's [xarray.DataArray.from_dict()](https://docs.xarray.dev/en/stable/generated/xarray.DataArray.from_dict.html) and it has the following keys:
`{"dims": ..., "data": ..., "attrs": quantity/measures_dict}`. The `integration_time` attribute included in the  attributes of the time coordinate is an example:

In [22]:
pprint.pprint(main_xds.time.attrs)

{'effective_integration_time': 'EFFECTIVE_INTEGRATION_TIME',
 'format': 'UNIX',
 'integration_time': {'attrs': {'type': 'quantity', 'units': ['s']},
                      'data': 6.048,
                      'dims': []},
 'scale': 'utc',
 'type': 'time',
 'units': ['s']}


#### Frequency coordinate

The `frequency` coordinate is a `spectral_coord` measure and as such has the following keys in its attributes: `type`, `units`, and `frame`. In addition, the attributes contain the `channel_width`, `spectral_window_name`, and `reference_frequency`.

Any metadata that is a quantity or measure (non-id numbers) is placed in the relevant measures or quantity dictionary.

In [23]:
main_xds.frequency

In the frequency coordinate we have example of:
- quantity given as a dict: `channel_width`
- measure given as a dict: `reference_frequency` (a `spectral_coord` ~= casacore/frequency)

In [24]:
pprint.pprint(main_xds.frequency.attrs)

{'channel_width': {'attrs': {'type': 'quantity', 'units': ['Hz']},
                   'data': 11231488.981445312,
                   'dims': []},
 'frame': 'LSRK',
 'reference_frequency': {'attrs': {'frame': 'LSRK',
                                   'type': 'spectral_coord',
                                   'units': ['Hz']},
                         'data': 344871541760.4001,
                         'dims': []},
 'spectral_window_id': 0,
 'spectral_window_name': 'spw_0',
 'type': 'spectral_coord',
 'units': ['Hz']}


### Metadata in dicts. Field info.

The MSv4 also allows for info dictionaries in the attribute section of the dataset. This is used when no n-dimensional data is required. The relevant measures metadata is included, similarly as with coordinates and data variables (when non-id) in xarray datasets.

An example is the field_info where the delay_direction, phase_direction, and reference_direction are stored as `sky_coord` measures (keys: `type`, `units`, `reference_frame`).

In [25]:
main_xds.VISIBILITY.field_and_source_xds

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16 B 16 B Shape (2,) (2,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 16 B 16 B Shape (2,) (2,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,16 B,16 B
Shape,"(2,)","(2,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


## Selection examples

One can use the usual selection functionality of xarray with all arrays, the main dataset and all sub datasets. For example, selection by labels, `sel()`:

In [26]:
sel_xds = main_xds.sel(frequency=slice(3.43939e11, 3.4397e11))
sel_xds.frequency

Or selection by indices, `isel()`

In [27]:
isel_xds = main_xds.isel(frequency=slice(1, 4))
isel_xds.frequency

In [28]:
sel_xds.equals(isel_xds)

False

In [29]:
sel_xds.identical(isel_xds)

False