## Jupyter notebook to demonstrate the basic calibration setup

This notebook runs the following tasks:
* Create a test Visibility dataset.
   * Not adding visibility sample noise. Just testing that things are working exactly as expected.
   * Using the GLEAM sky model and a common everybeam station beam model.
   * Add complex Gaussian noise corruptions to X and Y station gains.
* Some pre-processing.
* Predict model visibilities (using GLEAM and everybeam).
* Do bandpass calibration.

This notebook requires:
* See imports.

In [1]:
# Demonstration of basic calibration

# pylint cannot seem to handle astropy units
# pylint: disable=no-member

import importlib

# Imports
import warnings

import numpy as np
import xarray
from astropy import units
from astropy.coordinates import Angle, SkyCoord

# from ska_sdp_func_python.calibration.operations import apply_gaintable
from ska_sdp_datamodels.calibration.calibration_create import (
    create_gaintable_from_visibility,
)
from ska_sdp_datamodels.configuration.config_create import (
    create_named_configuration,
)
from ska_sdp_datamodels.science_data_model import PolarisationFrame
from ska_sdp_datamodels.visibility.vis_create import create_visibility
from ska_sdp_func_python.preprocessing.flagger import rfi_flagger

from ska_sdp_instrumental_calibration.processing_tasks.beams import (
    GenericBeams,
)
from ska_sdp_instrumental_calibration.processing_tasks.calibration import (
    apply_gaintable,
    solve_bandpass,
)
from ska_sdp_instrumental_calibration.processing_tasks.lsm_tmp import (
    convert_model_to_skycomponents,
    generate_lsm,
)
from ska_sdp_instrumental_calibration.processing_tasks.predict import (
    predict_from_components,
)

warnings.simplefilter(action="ignore", category=FutureWarning)



In [2]:
# Helper functions (might move some to package)


def get_slice_lims(length, nslice):
    """Generate a list of slice index limits for n slices.

    Generate indices to slice a list into a number of equal-length slices.
    Allow the final slice to be smaller if need be.

    :param length: length of list to be sliced.
    """
    slice_lim0 = np.arange(0, length, int(np.ceil(length / nslice)))
    return np.stack((slice_lim0, np.append(slice_lim0[1:], length))).T

In [3]:
# Create a test Visibility dataset

# -------------------------------------------------------------------------- #
# Set up the array

# Read in an array configuration
low_config = create_named_configuration("LOWBD2")

# Down-select to a desired sub-array
#  - ECP-240228 modified AA2 clusters:
#      Southern Arm: S8 (x6), S9, S10 (x6), S13, S15, S16
#      Northern Arm: N8, N9, N10, N13, N15, N16
#      Eastern Arm: E8, E9, E10, E13.
#  - Most include only 4 of 6 stations, so just use the first 4:
AA2 = (
    np.concatenate(
        (
            345 + np.arange(6),  # S8-1:6
            351 + np.arange(4),  # S9-1:4
            429 + np.arange(6),  # S10-1:6
            447 + np.arange(4),  # S13-1:4
            459 + np.arange(4),  # S15-1:4
            465 + np.arange(4),  # S16-1:4
            375 + np.arange(4),  # N8-1:4
            381 + np.arange(4),  # N9-1:4
            471 + np.arange(4),  # N10-1:4
            489 + np.arange(4),  # N13-1:4
            501 + np.arange(4),  # N15-1:4
            507 + np.arange(4),  # N16-1:4
            315 + np.arange(4),  # E8-1:4
            321 + np.arange(4),  # E9-1:4
            387 + np.arange(4),  # E10-1:4
            405 + np.arange(4),  # E13-1:4
        )
    )
    - 1
)
mask = np.isin(low_config.id.data, AA2)
nstations = low_config.stations.shape[0]
low_config = low_config.sel(indexers={"id": np.arange(nstations)[mask]})

# Reset relevant station parameters
nstations = low_config.stations.shape[0]
low_config.stations.data = np.arange(nstations).astype("str")
low_config = low_config.assign_coords(id=np.arange(nstations))
# low_config.attrs["name"] = low_config.name+"-AA2"
low_config.attrs["name"] = "AA2-Low-ECP-240228"

print(f"Using {low_config.name} with {nstations} stations")

# -------------------------------------------------------------------------- #
# Set up the observation

# Set the phase centre in the ICRS coordinate frame
ra0 = Angle(0.0 * units.hourangle)
dec0 = Angle(-27.0 * units.deg)

# Set the parameters of sky model components
# chanwidth = 400e6 / 512  # station/CBF coarse channels = 781.25 kHz
chanwidth = 5.4e3  # Hz
nfrequency = 64
frequency = 781.25e3 * 160 + chanwidth * np.arange(nfrequency)
sample_time = 0.9  # seconds
solution_interval = sample_time  # would normally be minutes

# Set the phase centre hour angle range for the sim (in radians)
ha0 = 1 * np.pi / 12  # radians
ha = ha0 + np.arange(0, solution_interval, sample_time) / 3600 * np.pi / 12

# Create the Visibility dataset
vis = create_visibility(
    low_config,
    ha,
    frequency,
    channel_bandwidth=[chanwidth] * len(frequency),
    polarisation_frame=PolarisationFrame("linear"),
    phasecentre=SkyCoord(ra=ra0, dec=dec0),
    weight=1.0,
)

# Generate a true sky model and true visibilties for the whole band
gleamfile = "/data/EOS_1/mit183/gleamegc.dat"
fov = 10.0
flux_limit = 1
tsm = generate_lsm(
    gleamfile=gleamfile,
    phasecentre=vis.phasecentre,
    fov=fov,
    flux_limit=flux_limit,
)
ms_path = "/data/EOS_1/mit183/SKA/SP-4626/OSKAR_MOCK.ms"
%env EVERYBEAM_DATADIR=/data/EOS_1/mit183/SKA/SP-4626/ska-sdp-func-everybeam/coeffs
beams = GenericBeams(vis=vis, array="Low", ms_path=ms_path)

tsm_components = convert_model_to_skycomponents(tsm, vis.frequency.data)
predict_from_components(vis, tsm_components, beams=beams)

# Possible future development:
#  - Add thermal noise.
#  - Use the GSM package.
#  - Image-based sky models with degridding?
#  - Generate an ionospheric phase screen and add direction-dependent delays.
#  - Use the phase screen to add differential ionospheric Faraday rotation.

# Apply random complex antenna gains
#  - Ignore polarisation for now. Just get basic calibration working.
jones = create_gaintable_from_visibility(
    vis, jones_type="B", timeslice=solution_interval
)
g_sigma = 0.1
jones.gain.data[..., 0, 0] = (
    np.random.normal(1, g_sigma, jones.gain.shape[:3])
    + np.random.normal(0, g_sigma, jones.gain.shape[:3]) * 1j
)
jones.gain.data[..., 1, 1] = (
    np.random.normal(1, g_sigma, jones.gain.shape[:3])
    + np.random.normal(0, g_sigma, jones.gain.shape[:3]) * 1j
)

vis = apply_gaintable(vis=vis, gt=jones, inverse=False)

Using AA2-Low-ECP-240228 with 68 stations


2024-11-13 16:43:18,342 - ska_sdp_instrumental_calibration.processing_tasks.lsm_tmp - INFO - extracted 37 GLEAM components


env: EVERYBEAM_DATADIR=/data/EOS_1/mit183/SKA/SP-4626/ska-sdp-func-everybeam/coeffs
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with freque

In [4]:
# Do pre-processing

# Get the LSM (single call for all channels / dask tasks)
lsm = generate_lsm(
    gleamfile=gleamfile,
    phasecentre=vis.phasecentre,
    fov=fov,
    flux_limit=flux_limit,
)

%env EVERYBEAM_DATADIR=/data/EOS_1/mit183/SKA/SP-4626/ska-sdp-func-everybeam/coeffs
beams = GenericBeams(vis=vis, array="Low", ms_path=ms_path)

# Adapative RFI flagging (assume known flags/birdies have been applied)
#  - Could also use dask parallelism.
#  - Should have already been done in the batch pre-processing pipeline
# This works but requires ska_sdp_func:
if importlib.util.find_spec("ska_sdp_func") is not None:
    print("Calling ska_sdp_func_python preprocessing flagger rfi_flagger")
    vis = rfi_flagger(vis)
else:
    print("ska_sdp_func is not available for rfi flagging")

# Chunking of Visibility dataset in frequency

# Set a number of "parallel dask tasks" to divide calibration between
#  - There is no dask. Each "task" is done sequentially.
ndasktask = 4

# Averaging of Visibility datasets in time or frequency.
#  - Presumably use dask parallelism.
#  - Done as part of chunking?

2024-11-13 16:43:23,416 - ska_sdp_instrumental_calibration.processing_tasks.lsm_tmp - INFO - extracted 37 GLEAM components


env: EVERYBEAM_DATADIR=/data/EOS_1/mit183/SKA/SP-4626/ska-sdp-func-everybeam/coeffs
Calling ska_sdp_func_python preprocessing flagger rfi_flagger


In [5]:
# Predict model visibilities

# Could do this inside the bandpass calibration area, parallel by subband.
# But keep it separate to test that:
#  - it can be done in another workflow (e.g. in pre-processing)
#  - accelerators can be used

# Create an empty model Visibility dataset
#  - Is it better to generate separate sub-band datasets and then concatenate?
#     - It is presumably faster to allocate the xarray sub-bands in parallel.
#     - If they have frequency chunking anyway, perhaps concat is efficient...
#  - Want frequency chuncking, so copy rather than calling create_visibility?
#     - Seems reasonable, but may want to duplicate pre-proc time and
#       frequency averaging if there is appreciable decorrelation. In which
#       case this would need to be done before pre-precessing.
modelvis = vis.assign({"vis": xarray.zeros_like(vis.vis)})
assert np.all(modelvis.vis.data == 0)

print(
    "full spectral range: "
    + f"{np.min(modelvis.frequency.data)/1e6:.4f} - "
    + f"{np.max(modelvis.frequency.data)/1e6:.4f} MHz"
)

nchan = 0
for dasktask, slice_lims in enumerate(get_slice_lims(nfrequency, ndasktask)):
    # Create an xarray view for the current sub-band
    bandmodel = modelvis.isel(frequency=slice(slice_lims[0], slice_lims[1]))
    nchan += len(bandmodel.frequency)
    print(
        "band spectral range: "
        + f"{np.min(bandmodel.frequency.data)/1e6:.4f} - "
        + f"{np.max(bandmodel.frequency.data)/1e6:.4f} MHz "
        + f"(dask task {dasktask}, nchan = {len(bandmodel.frequency)})"
    )

    # Put a point source at phase centre
    lsm_components = convert_model_to_skycomponents(
        lsm, bandmodel.frequency.data, freq0=200e6
    )
    bandmodel = predict_from_components(bandmodel, lsm_components, beams=beams)

assert len(modelvis.frequency) == nchan

# Make sure modelvis was updated
#  - i.e. ensure that the isel in predict used reference semantics
assert not np.all(
    modelvis.vis.data == 0
), "bandmodel view updates should have changed this"

full spectral range: 125.0000 - 125.3402 MHz
band spectral range: 125.0000 - 125.0810 MHz (dask task 0, nchan = 16)
band spectral range: 125.0864 - 125.1674 MHz (dask task 1, nchan = 16)
band spectral range: 125.1728 - 125.2538 MHz (dask task 2, nchan = 16)
band spectral range: 125.2592 - 125.3402 MHz (dask task 3, nchan = 16)


In [6]:
# Do the bandpass calibration

# Create a full-band bandpass calibration gain table
#  - It may be easier to do this in each sub-band then concatenate...
gaintable = create_gaintable_from_visibility(
    vis, jones_type="B", timeslice=solution_interval
)

assert len(gaintable.time) == 1

refant = 0

for dasktask, slice_lims in enumerate(get_slice_lims(nfrequency, ndasktask)):
    # Create xarray views for the current sub-band
    bandvis = vis.isel(frequency=slice(slice_lims[0], slice_lims[1]))
    bandmodel = modelvis.isel(frequency=slice(slice_lims[0], slice_lims[1]))
    # This assumes that the same slicing is used for the table. The solver
    # allows for each task to also combine the data into a single solution for
    # all channels, to generate solutions at the task sub-band resolution.
    # Would need to generalise this selection though.
    bandtable = gaintable.isel(frequency=slice(slice_lims[0], slice_lims[1]))
    print(
        "band spectral range: "
        + f"{np.min(bandmodel.frequency.data)/1e6:.4f} - "
        + f"{np.max(bandmodel.frequency.data)/1e6:.4f} MHz "
        + f"(dask task {dasktask}, nchan = {len(bandmodel.frequency)})"
    )

    # Call bandpass calibration function for this sub-band.
    #  - bandtable should either have a single solution spectral channel or
    #    one for each input channel. Here it is the latter.
    #  - Internally, the bandpass calibration function might call solvers
    #    multiple times.
    #  - The bandpass calibration function should return a list of bad
    #    antennas and bad channels, as well as other QA data.

    solve_bandpass(
        vis=bandvis,
        modelvis=bandmodel,
        gain_table=bandtable,
        refant=refant,
    )

# Do some phase referencing for comparisons
inputdata = jones.gain.data * np.exp(
    -1j * np.angle(jones.gain.data[:, [refant], :, :, :])
)

assert np.all(np.isclose(gaintable.gain.data, inputdata))

band spectral range: 125.0000 - 125.0810 MHz (dask task 0, nchan = 16)
band spectral range: 125.0864 - 125.1674 MHz (dask task 1, nchan = 16)
band spectral range: 125.1728 - 125.2538 MHz (dask task 2, nchan = 16)
band spectral range: 125.2592 - 125.3402 MHz (dask task 3, nchan = 16)


In [7]:
# Do post-processing

# Do any required interpolation.

# Estimate delays using the full GainTable (could distribute over antennas)

# Estimate differential Faraday rotation using the full GainTable (could
# distribute over antennas)

# Generate QA and flagging information