Skip to content
This repository has been archived by the owner on Feb 7, 2024. It is now read-only.

Commit

Permalink
read_ms fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
ryanraba committed May 16, 2021
1 parent 5ae8a6b commit a76790c
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 11 deletions.
3 changes: 3 additions & 0 deletions cngi/_utils/_table_conversion2.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ def read_main_table(infile, subsel=0, ignore=None, chunks=(400, 200, 100, 2)):

# select just the specified ddi
tb_tool = tables.taql('select * from %s where DATA_DESC_ID = %i' % (infile, subsel))
if tb_tool.nrows() == 0:
tb_tool.close()
return xarray.Dataset()

# main table uses time x (antenna1,antenna2)
ant1, ant2 = tb_tool.getcol('ANTENNA1',0,-1), tb_tool.getcol('ANTENNA2', 0, -1)
Expand Down
20 changes: 9 additions & 11 deletions cngi/conversion/read_ms.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,36 +18,34 @@
"""


def read_ms(infile, ddis=None, ignore=['HISTORY'], chunks=(400, 400, 64, 2)):
def read_ms(infile, ddis=None, ignore=None, chunks=(400, 400, 64, 2)):
"""
Convert legacy format MS to xarray Visibility Dataset and zarr storage format
The CASA MSv2 format is converted to the MSv3 schema per the
specified definition here: https://drive.google.com/file/d/10TZ4dsFw9CconBc-GFxSeb2caT6wkmza/view?usp=sharing
The MS is partitioned by DDI, which guarantees a fixed data shape per partition. This results in different subdirectories
under the main vis.zarr folder. There is no DDI in MSv3, so this simply serves as a partition id in the zarr directory.
The MS is partitioned by DDI, which guarantees a fixed data shape per partition. This results in separate xarray
dataset (xds) partitions contained within a main xds (mxds). There is no DDI in MSv3, so this simply serves as
a partition id for each xds.
Parameters
----------
infile : str
Input MS filename
outfile : str
Output zarr filename when conversion is desired. Default None reads MS directly to xarray without conversion
ddis : list
List of specific DDIs to convert. DDI's are integer values, or use 'global' string for subtables. Leave as None to convert entire MS
ignore : list
List of subtables to ignore (case sensitive and generally all uppercase). This is useful if a particular subtable is causing errors.
Default is None. Note: default is now temporarily set to ignore the HISTORY table due a CASA6 issue in the table tool affecting a small
set of test cases (set back to None if HISTORY is needed)
List of subtables to ignore (case sensitive and generally all uppercase). This is useful if a particular subtable is causing errors
or is very large and slowing down reads. Default is None
chunks: 4-D tuple of ints
Shape of desired chunking in the form of (time, baseline, channel, polarization), use -1 for entire axis in one chunk. Default is (100, 400, 20, 1)
Note: chunk size is the product of the four numbers, and data is batch processed by time axis, so that will drive memory needed for conversion.
Shape of desired chunking in the form of (time, baseline, channel, polarization). Larger values reduce the number of chunks and
speed up the reads at the cost of more memory. Chunk size is the product of the four numbers. Default is (400, 400, 64, 2)
Returns
-------
xarray.core.dataset.Dataset
Master xarray dataset of datasets for this visibility set
Main xarray dataset of datasets for this visibility set
"""
import os
import xarray
Expand Down
5 changes: 5 additions & 0 deletions tests/test_ms_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,8 @@ def compare_xds(xds1, xds2):
#xds = read_ms('data/uid___A002_Xc3032e_X27c3.ms', ddis=[25], chunks=(600,100,400,2)).xds25
print('read complete in %s seconds' % str(time.time()-start))
write_vis(mxds, 'data/test_conversion.vis.zarr')


start = time.time()
mxds = read_ms(infile, chunks=(1000,64,1000,2))
print('read complete in %s seconds' % str(time.time()-start))

0 comments on commit a76790c

Please sign in to comment.