## Jupyter notebook to demonstrate the basic calibration setup

This notebook runs the following tasks:
* Create a test Visibility dataset.
   * Not adding visibility sample noise. Just testing that things are working exactly as expected.
   * Using the GLEAM sky model and a common everybeam station beam model.
   * Add complex Gaussian noise corruptions to X and Y station gains.
* Save the dataset as a MS so that it can be read back in.
* Read it back in sub-bands.
* Some pre-processing.
* Predict model visibilities (using GLEAM and everybeam).
* Do bandpass calibration.
* Use dask to handle processing of different frequency sub-bands.

This notebook requires:
* See imports.

#### Note: need to comment out a few xarray files to run!
* xarray/core/parallel.py: line 194
* xarray/core/indexes.py: conditional starting at line 1400

In [1]:
# Demonstration of basic calibration

# pylint cannot seem to handle astropy units
# pylint: disable=no-member

import logging
import sys

# Imports
import warnings

import numpy as np
from astropy import units
from astropy.coordinates import Angle, SkyCoord
from dask.distributed import Client, LocalCluster
from ska_sdp_datamodels.calibration.calibration_create import (
    create_gaintable_from_visibility,
)
from ska_sdp_datamodels.calibration.calibration_functions import (
    export_gaintable_to_hdf5,
)
from ska_sdp_datamodels.configuration.config_create import (
    create_named_configuration,
)
from ska_sdp_datamodels.science_data_model import PolarisationFrame
from ska_sdp_datamodels.visibility.vis_create import create_visibility
from ska_sdp_datamodels.visibility.vis_io_ms import export_visibility_to_ms

from ska_sdp_instrumental_calibration.data_managers.dask_wrappers import (
    load_ms,
    predict_vis,
    run_solver,
)
from ska_sdp_instrumental_calibration.processing_tasks.calibration import (
    apply_gaintable,
)
from ska_sdp_instrumental_calibration.processing_tasks.lsm_tmp import (
    convert_model_to_skycomponents,
    generate_lsm,
)
from ska_sdp_instrumental_calibration.processing_tasks.predict import (
    predict_from_components,
)

warnings.simplefilter(action="ignore", category=FutureWarning)

log = logging.getLogger()
log.setLevel(logging.INFO)
log.addHandler(logging.StreamHandler(sys.stdout))



In [2]:
# pipeline config

gleamfile = "/data/EOS_1/mit183/gleamegc.dat"
eb_ms = "/data/EOS_1/mit183/SKA/SP-4626/OSKAR_MOCK.ms"
eb_coeffs = "/data/EOS_1/mit183/SKA/SP-4626/ska-sdp-func-everybeam/coeffs"

ms_name = "demo.ms"
hdf5_name = "demo.hdf5"

In [3]:
# Set up a local dask cluster and client

cluster = LocalCluster()
client = Client(cluster)

In [4]:
# Create a test Visibility dataset

# -------------------------------------------------------------------------- #
# Set up the array

# Read in an array configuration
low_config = create_named_configuration("LOWBD2")

# Down-select to a desired sub-array
#  - ECP-240228 modified AA2 clusters:
#      Southern Arm: S8 (x6), S9, S10 (x6), S13, S15, S16
#      Northern Arm: N8, N9, N10, N13, N15, N16
#      Eastern Arm: E8, E9, E10, E13.
#  - Most include only 4 of 6 stations, so just use the first 4:
AA2 = (
    np.concatenate(
        (
            345 + np.arange(6),  # S8-1:6
            351 + np.arange(4),  # S9-1:4
            429 + np.arange(6),  # S10-1:6
            447 + np.arange(4),  # S13-1:4
            459 + np.arange(4),  # S15-1:4
            465 + np.arange(4),  # S16-1:4
            375 + np.arange(4),  # N8-1:4
            381 + np.arange(4),  # N9-1:4
            471 + np.arange(4),  # N10-1:4
            489 + np.arange(4),  # N13-1:4
            501 + np.arange(4),  # N15-1:4
            507 + np.arange(4),  # N16-1:4
            315 + np.arange(4),  # E8-1:4
            321 + np.arange(4),  # E9-1:4
            387 + np.arange(4),  # E10-1:4
            405 + np.arange(4),  # E13-1:4
        )
    )
    - 1
)
mask = np.isin(low_config.id.data, AA2)
nstations = low_config.stations.shape[0]
low_config = low_config.sel(indexers={"id": np.arange(nstations)[mask]})

# Reset relevant station parameters
nstations = low_config.stations.shape[0]
low_config.stations.data = np.arange(nstations).astype("str")
low_config = low_config.assign_coords(id=np.arange(nstations))
# low_config.attrs["name"] = low_config.name+"-AA2"
low_config.attrs["name"] = "AA2-Low-ECP-240228"

print(f"Using {low_config.name} with {nstations} stations")

# -------------------------------------------------------------------------- #
# Set up the observation

# Set the phase centre in the ICRS coordinate frame
ra0 = Angle(0.0 * units.hourangle)
dec0 = Angle(-27.0 * units.deg)

# Set the parameters of sky model components
# chanwidth = 400e6 / 512  # station/CBF coarse channels = 781.25 kHz
chanwidth = 5.4e3  # Hz
nfrequency = 64
frequency = 781.25e3 * 160 + chanwidth * np.arange(nfrequency)
sample_time = 0.9  # seconds
solution_interval = sample_time  # would normally be minutes

# Set the phase centre hour angle range for the sim (in radians)
ha0 = 1 * np.pi / 12  # radians
ha = ha0 + np.arange(0, solution_interval, sample_time) / 3600 * np.pi / 12

# Create the Visibility dataset
vis = create_visibility(
    low_config,
    ha,
    frequency,
    channel_bandwidth=[chanwidth] * len(frequency),
    polarisation_frame=PolarisationFrame("linear"),
    phasecentre=SkyCoord(ra=ra0, dec=dec0),
    weight=1.0,
)

# Generate a true sky model and true visibilties for the whole band
fov = 10.0
flux_limit = 1
tsm = generate_lsm(
    gleamfile=gleamfile,
    phasecentre=vis.phasecentre,
    fov=fov,
    flux_limit=flux_limit,
)

tsm_components = convert_model_to_skycomponents(tsm, vis.frequency.data)
predict_from_components(vis, tsm_components, eb_coeffs=eb_coeffs, eb_ms=eb_ms)

# Apply random complex antenna gains
#  - Ignore polarisation for now. Just get basic calibration working.
jones = create_gaintable_from_visibility(
    vis, jones_type="B", timeslice=solution_interval
)
g_sigma = 0.1
jones.gain.data[..., 0, 0] = (
    np.random.normal(1, g_sigma, jones.gain.shape[:3])
    + np.random.normal(0, g_sigma, jones.gain.shape[:3]) * 1j
)
jones.gain.data[..., 1, 1] = (
    np.random.normal(1, g_sigma, jones.gain.shape[:3])
    + np.random.normal(0, g_sigma, jones.gain.shape[:3]) * 1j
)

vis = apply_gaintable(vis=vis, gt=jones, inverse=False)

# Export vis to a file
export_visibility_to_ms(ms_name, [vis])

vis = None

Using AA2-Low-ECP-240228 with 68 stations


2024-12-02 14:14:07,348 - processing_tasks.lsm_tmp - INFO - extracted 37 GLEAM components


extracted 37 GLEAM components
Initialising beams for Low
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could n

In [5]:
# Do pre-processing

# Set the number of channels in each frequency chunk
fchunk = 16

# Read in the Visibility dataset in chunks
vis = load_ms(ms_name, fchunk)

# This triggers an extra load, but but is worth checking
# assert np.all(vis.vis.data == create_visibility_from_ms(ms_name)[0].vis.data)

# Get the LSM (single call for all channels / dask tasks)
lsm = generate_lsm(
    gleamfile=gleamfile,
    phasecentre=vis.phasecentre,
    fov=fov,
    flux_limit=flux_limit,
)

# Adapative RFI flagging
#  - Is triggering the computation as is, so leave it for now.
#  - Move to dask_wrappers? RFI flagging may need bandwidth...
# vis = rfi_flagger(vis)

# Averaging of Visibility datasets in time or frequency.
#  - Presumably use dask parallelism.
#  - Done as part of chunking?

Reading uni. fields [0], uni. data descs [0]


2024-12-02 14:14:14,394 - processing_tasks.lsm_tmp - INFO - extracted 37 GLEAM components


extracted 37 GLEAM components


In [6]:
# Predict model visibilities
modelvis = predict_vis(vis, lsm, eb_ms=eb_ms, eb_coeffs=eb_coeffs)

# Make sure modelvis was updated
# This triggers an extra load, but but is worth checking
# assert np.all(modelvis.vis.data[..., [0, 3]] != 0), "vis should have changed"

In [7]:
# Do the bandpass calibration

refant = 0

gaintable = run_solver(vis=vis, modelvis=modelvis, refant=refant).load()

# Do some phase referencing for comparisons
inputdata = jones.gain.data * np.exp(
    -1j * np.angle(jones.gain.data[:, [refant], :, :, :])
)

assert np.all(
    np.isclose(gaintable.gain.data, inputdata)
), "Calibration should have converged."

print("done!")

  return cls(datavars, coords=coords, attrs=attrs)
  return cls(datavars, coords=coords, attrs=attrs)
  return cls(datavars, coords=coords, attrs=attrs)
  return cls(datavars, coords=coords, attrs=attrs)


Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest 

In [8]:
# Shut down the scheduler and workers
client.close()
client.shutdown()

In [9]:
# Output hdf5 file
export_gaintable_to_hdf5([gaintable], hdf5_name)