## Jupyter notebook to demonstrate the basic calibration setup

This notebook runs the following tasks:
* Create a test Visibility dataset.
   * Not adding visibility sample noise. Just testing that things are working exactly as expected.
   * Using the GLEAM sky model and a common everybeam station beam model.
   * Add complex Gaussian noise corruptions to X and Y station gains.
* Some pre-processing.
* Predict model visibilities (using GLEAM and everybeam).
* Do bandpass calibration.
* Use dask to handle processing of different frequency sub-bands.

This notebook requires:
* See imports.

#### Note: need to comment out a few xarray files to run!
* xarray/core/parallel.py: line 194
* xarray/core/indexes.py: conditional starting at line 1400

In [1]:
# Demonstration of basic calibration

# pylint cannot seem to handle astropy units
# pylint: disable=no-member

import importlib

# Imports
import warnings

import numpy as np
import xarray as xr
from astropy import units
from astropy.coordinates import Angle, SkyCoord

# from ska_sdp_func_python.calibration.operations import apply_gaintable
from ska_sdp_datamodels.calibration.calibration_create import (
    create_gaintable_from_visibility,
)
from ska_sdp_datamodels.configuration.config_create import (
    create_named_configuration,
)
from ska_sdp_datamodels.science_data_model import PolarisationFrame
from ska_sdp_datamodels.visibility.vis_create import create_visibility
from ska_sdp_func_python.preprocessing.flagger import rfi_flagger

from ska_sdp_instrumental_calibration.processing_tasks.beams import (
    GenericBeams,
)
from ska_sdp_instrumental_calibration.processing_tasks.calibration import (
    apply_gaintable,
    solve_bandpass,
)
from ska_sdp_instrumental_calibration.processing_tasks.lsm_tmp import (
    convert_model_to_skycomponents,
    generate_lsm,
)
from ska_sdp_instrumental_calibration.processing_tasks.predict import (
    predict_from_components,
)

warnings.simplefilter(action="ignore", category=FutureWarning)



In [2]:
# Create a test Visibility dataset

# -------------------------------------------------------------------------- #
# Set up the array

# Read in an array configuration
low_config = create_named_configuration("LOWBD2")

# Down-select to a desired sub-array
#  - ECP-240228 modified AA2 clusters:
#      Southern Arm: S8 (x6), S9, S10 (x6), S13, S15, S16
#      Northern Arm: N8, N9, N10, N13, N15, N16
#      Eastern Arm: E8, E9, E10, E13.
#  - Most include only 4 of 6 stations, so just use the first 4:
AA2 = (
    np.concatenate(
        (
            345 + np.arange(6),  # S8-1:6
            351 + np.arange(4),  # S9-1:4
            429 + np.arange(6),  # S10-1:6
            447 + np.arange(4),  # S13-1:4
            459 + np.arange(4),  # S15-1:4
            465 + np.arange(4),  # S16-1:4
            375 + np.arange(4),  # N8-1:4
            381 + np.arange(4),  # N9-1:4
            471 + np.arange(4),  # N10-1:4
            489 + np.arange(4),  # N13-1:4
            501 + np.arange(4),  # N15-1:4
            507 + np.arange(4),  # N16-1:4
            315 + np.arange(4),  # E8-1:4
            321 + np.arange(4),  # E9-1:4
            387 + np.arange(4),  # E10-1:4
            405 + np.arange(4),  # E13-1:4
        )
    )
    - 1
)
mask = np.isin(low_config.id.data, AA2)
nstations = low_config.stations.shape[0]
low_config = low_config.sel(indexers={"id": np.arange(nstations)[mask]})

# Reset relevant station parameters
nstations = low_config.stations.shape[0]
low_config.stations.data = np.arange(nstations).astype("str")
low_config = low_config.assign_coords(id=np.arange(nstations))
# low_config.attrs["name"] = low_config.name+"-AA2"
low_config.attrs["name"] = "AA2-Low-ECP-240228"

print(f"Using {low_config.name} with {nstations} stations")

# -------------------------------------------------------------------------- #
# Set up the observation

# Set the phase centre in the ICRS coordinate frame
ra0 = Angle(0.0 * units.hourangle)
dec0 = Angle(-27.0 * units.deg)

# Set the parameters of sky model components
# chanwidth = 400e6 / 512  # station/CBF coarse channels = 781.25 kHz
chanwidth = 5.4e3  # Hz
nfrequency = 64
frequency = 781.25e3 * 160 + chanwidth * np.arange(nfrequency)
sample_time = 0.9  # seconds
solution_interval = sample_time  # would normally be minutes

# Set the phase centre hour angle range for the sim (in radians)
ha0 = 1 * np.pi / 12  # radians
ha = ha0 + np.arange(0, solution_interval, sample_time) / 3600 * np.pi / 12

# Create the Visibility dataset
vis = create_visibility(
    low_config,
    ha,
    frequency,
    channel_bandwidth=[chanwidth] * len(frequency),
    polarisation_frame=PolarisationFrame("linear"),
    phasecentre=SkyCoord(ra=ra0, dec=dec0),
    weight=1.0,
)

# Generate a true sky model and true visibilties for the whole band
gleamfile = "/data/EOS_1/mit183/gleamegc.dat"
fov = 10.0
flux_limit = 1
tsm = generate_lsm(
    gleamfile=gleamfile,
    phasecentre=vis.phasecentre,
    fov=fov,
    flux_limit=flux_limit,
)
ms_path = "/data/EOS_1/mit183/SKA/SP-4626/OSKAR_MOCK.ms"
%env EVERYBEAM_DATADIR=/data/EOS_1/mit183/SKA/SP-4626/ska-sdp-func-everybeam/coeffs
beams = GenericBeams(vis=vis, array="Low", ms_path=ms_path)

tsm_components = convert_model_to_skycomponents(tsm, vis.frequency.data)
predict_from_components(vis, tsm_components, beams=beams)

# Possible future development:
#  - Add thermal noise.
#  - Add more components.
#     - Use a skycomponents list.
#     - Use the GSM package.
#     - Use a DFT function.
#     - Image-based sky models with degridding?
#  - Include direction-independent gains and delays.
#  - Include beam models.
#  - Generate an ionospheric phase screen and add direction-dependent delays.
#  - Use the phase screen to add differential ionospheric Faraday rotation.

# Apply random complex antenna gains
#  - Ignore polarisation for now. Just get basic calibration working.
jones = create_gaintable_from_visibility(
    vis, jones_type="B", timeslice=solution_interval
)
g_sigma = 0.1
jones.gain.data[..., 0, 0] = (
    np.random.normal(1, g_sigma, jones.gain.shape[:3])
    + np.random.normal(0, g_sigma, jones.gain.shape[:3]) * 1j
)
jones.gain.data[..., 1, 1] = (
    np.random.normal(1, g_sigma, jones.gain.shape[:3])
    + np.random.normal(0, g_sigma, jones.gain.shape[:3]) * 1j
)

vis = apply_gaintable(vis=vis, gt=jones, inverse=False)

Using AA2-Low-ECP-240228 with 68 stations


2024-11-13 16:52:40,150 - ska_sdp_instrumental_calibration.processing_tasks.lsm_tmp - INFO - extracted 37 GLEAM components


env: EVERYBEAM_DATADIR=/data/EOS_1/mit183/SKA/SP-4626/ska-sdp-func-everybeam/coeffs
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with frequency 137 MHz instead
Could not load dataset for frequency 125 MHz, using the nearest neighbor with freque

In [3]:
# Do pre-processing

# Get the LSM (single call for all channels / dask tasks)
lsm = generate_lsm(
    gleamfile=gleamfile,
    phasecentre=vis.phasecentre,
    fov=fov,
    flux_limit=flux_limit,
)

%env EVERYBEAM_DATADIR=/data/EOS_1/mit183/SKA/SP-4626/ska-sdp-func-everybeam/coeffs
beams = GenericBeams(vis=vis, array="Low", ms_path=ms_path)

# Adapative RFI flagging (assume known flags/birdies have been applied)
#  - Could also use dask parallelism.
#  - Should have already been done in the batch pre-processing pipeline
# This works but requires ska_sdp_func:
if importlib.util.find_spec("ska_sdp_func") is not None:
    print("Calling ska_sdp_func_python preprocessing flagger rfi_flagger")
    vis = rfi_flagger(vis)
else:
    print("ska_sdp_func is not available for rfi flagging")

# Chunking of Visibility dataset in frequency

# Save a copy for final checks
vis_numpy = vis.copy(deep=True)

# Set the (max) number of channels in each frequency chunk
fchunk = 16

vis = vis.chunk({"frequency": fchunk})

# Averaging of Visibility datasets in time or frequency.
#  - Presumably use dask parallelism.
#  - Done as part of chunking?

2024-11-13 16:52:44,912 - ska_sdp_instrumental_calibration.processing_tasks.lsm_tmp - INFO - extracted 37 GLEAM components


env: EVERYBEAM_DATADIR=/data/EOS_1/mit183/SKA/SP-4626/ska-sdp-func-everybeam/coeffs
Calling ska_sdp_func_python preprocessing flagger rfi_flagger


In [4]:
def func(ds):
    print("in func, ds type is", type(ds))
    if isinstance(ds, xr.Dataset):
        print(" - freq shape =", ds.frequency.shape)
    # ds += 1
    return ds

# modelvis = vis.assign({"vis": xr.zeros_like(vis.vis)})
modelvis = create_visibility(
    low_config,
    ha,
    frequency,
    channel_bandwidth=[chanwidth] * len(frequency),
    polarisation_frame=PolarisationFrame("linear"),
    phasecentre=SkyCoord(ra=ra0, dec=dec0),
    weight=1.0,
).chunk({"frequency": fchunk})

assert np.all(modelvis.vis.data == 0)

print(modelvis)
print("Input freq shape =", modelvis.frequency.shape)
print()

modelvis = modelvis.map_blocks(func)


<xarray.Visibility> Size: 19MB
Dimensions:            (time: 1, baselines: 2346, frequency: 64,
                        polarisation: 4, spatial: 3)
Coordinates:
  * time               (time) float64 8B 4.453e+09
  * baselines          (baselines) object 19kB MultiIndex
  * antenna1           (baselines) int64 19kB 0 0 0 0 0 0 ... 65 65 65 66 66 67
  * antenna2           (baselines) int64 19kB 0 1 2 3 4 5 ... 65 66 67 66 67 67
  * frequency          (frequency) float64 512B 1.25e+08 1.25e+08 ... 1.253e+08
  * polarisation       (polarisation) <U2 32B 'XX' 'XY' 'YX' 'YY'
  * spatial            (spatial) <U1 12B 'u' 'v' 'w'
Data variables:
    integration_time   (time) float64 8B dask.array<chunksize=(1,), meta=np.ndarray>
    datetime           (time) datetime64[ns] 8B dask.array<chunksize=(1,), meta=np.ndarray>
    vis                (time, baselines, frequency, polarisation) complex128 10MB dask.array<chunksize=(1, 2346, 16, 4), meta=np.ndarray>
    weight             (time, baselines

In [5]:
# Predict model visibilities

# Could do this inside the bandpass calibration area, but do it here so that:
#  - it is available to other calibration workflows,
#  - different task distribution can be used, and
#  - accelerators can be used.

# Create a model Visibility dataset
#  - Is it better to generate separate sub-band datasets and then concatenate?
#     - It is presumably faster to allocate the xarray sub-bands in parallel.
#     - If they have frequency chunking anyway, perhaps concat is efficient...
#  - Want frequency chuncking, so copy rather than calling create_visibility?
#     - Seems reasonable, but may want to duplicate pre-proc time and
#       frequency averaging if there is appreciable decorrelation. In which
#       case this would need to be done before pre-precessing.

modelvis = vis.assign({"vis": xr.zeros_like(vis.vis)})
assert np.all(modelvis.vis.data == 0)

def _predict(vischunk, lsm, beams):

    if len(vischunk.frequency) > 0:

        lsm_components = convert_model_to_skycomponents(
            lsm, vischunk.frequency.data, freq0=200e6
        )

        predict_from_components(vischunk, lsm_components, beams=beams)

    return vischunk

modelvis = modelvis.map_blocks(_predict, args=[lsm, beams])

# Make sure modelvis was updated
#  - i.e. ensure that the isel in predict used reference semantics
# assert not np.all(modelvis.vis.data == 0), "modelvis should have changed"
assert np.all(modelvis.vis.data != 0), "modelvis should have changed"

In [6]:
# Do the bandpass calibration

# Create a full-band bandpass calibration gain table
#  - It may be easier to do this in each sub-band then concatenate...
gaintable = create_gaintable_from_visibility(
    vis_numpy, jones_type="B", timeslice=solution_interval
).chunk({"frequency": fchunk})

assert len(gaintable.time) == 1

refant = 0

# Add model and gaintable data to the observed vis dataset so a single
# map_blocks call can be made
megaset = vis.assign(modelvis=modelvis.vis).assign(gain=gaintable.gain)

def _solve(vischunk, refant):

    if len(vischunk.frequency) > 0:
        # Set multiple views into the combined dataset to keep the solver happy
        vis = vischunk.drop_vars(
            ["gain", "antenna", "receptor1", "receptor2", "modelvis"]
        )
        modelvis = vischunk.drop_vars(
            ["gain", "antenna", "receptor1", "receptor2", "vis"]
        ).rename({"modelvis": "vis"})
        solution_interval = np.max(vis.time.data) - np.min(vis.time.data)
        # Create a gaintable wrapper for the gain data
        gaintable = create_gaintable_from_visibility(
            vis,
            jones_type="B",
            timeslice=solution_interval,
        )
        gaintable.gain.data = vischunk.gain.data
        # Call the solver
        solve_bandpass(
            vis=vis,
            modelvis=modelvis,
            gain_table=gaintable,
            refant=refant,
        )

    return vischunk

megaset = megaset.map_blocks(_solve, args=[refant])

# This may not be very efficient, but it will do for now
gaintable.gain.data = megaset.gain.data

# Do some phase referencing for comparisons
inputdata = jones.gain.data * np.exp(
    -1j * np.angle(jones.gain.data[:, [refant], :, :, :])
)

assert np.all(
    np.isclose(gaintable.gain.data, inputdata)
).compute(), "Calibration should have converged."

print("done!")

done!


In [7]:
# Do post-processing

# Do any required interpolation.

# Estimate delays using the full GainTable (could distribute over antennas)

# Estimate differential Faraday rotation using the full GainTable (could
# distribute over antennas)

# Generate QA and flagging information