# Validata Predict Stage

In [1]:
from ska_sdp_instrumental_calibration.workflow.stages import (
    load_data_stage,
    predict_vis_stage,
)
from ska_sdp_datamodels.visibility.vis_io_ms import export_visibility_to_ms
from ska_sdp_instrumental_calibration.scheduler import UpstreamOutput
import matplotlib.pyplot as plt
import os

  cls = super().__new__(mcls, name, bases, namespace, **kwargs)


### Data Generation

We had simulated data using Oskar. The simulation scripts are present in `scripts/ska_low_sim`. (refer [confluence page](https://confluence.skatelescope.org/display/SE/DHR-311%3A+Script+to+simulate+SKA-LOW+visibilities))

#### Simulation Configuration

For simulation following configuration is used. (further refered as `custom_sim.yaml`)

``` yaml
scenario: "low40s-model"         

# ===============================
# Global simulation parameters
# ===============================

n_stations: 40                                         # Number of stations
tel_model: "./telescope-models/SKA-Low_AA2_40S_rigid-rotation_model.tm" # Telescope model directory

simulation_start_frequency_hz: 123.0e6                  # Start frequency (Hz)
simulation_end_frequency_hz: 153.0e6                    # End frequency (Hz)
correlated_channel_bandwidth_hz: 21.70138888888889e3    # Channel width (Hz)

observing_time_mins: 10                              # Observation duration (minutes)
sampling_time_sec: 3.3973862400000003                   # Dump/integration time (seconds)

fields:
  EoR2:
    Cal1:
      ra_deg: 197.914612
      dec_deg: -22.277973
      scan_id_start: 300
      transit_time: "2000-01-03 22:33:30.000"

# ===============================
# Options for run_sim.py
# ===============================

run_sim:
  oskar_sif: "./OSKAR-2.11.1-Python3.sif" # Path to OSKAR Singularity image

  # GLEAM sky model.
  gleam_file: "./sky-models/GLEAM_EGC.fits" # GLEAM catalogue FITS file
  field_radius_deg: 10.0            # Radius of field of view (degrees)

  # Imaging parameters using wsclean. Optional. Comment to disable.
  create_dirty_image: true          # Whether to run wsclean imaging
  image_size: 1024                  # Image size (pixels)
  pixel_size: "2arcsec"             # Pixel size (angular units)

  # Extra parameters to pass directly to run_oskar.py
  run_oskar_extra_params: "--use-gpus --double-precision"
```

Follow steps mentioned in confluence page for data and enviornment setup. Run the following command to simulate visibilities. 
`python run_sim.py custom_sim.yaml`

This simulation doesn't have any corruptions. With the above config simulation script images the simulated visibilities, these images can be used for further verification. 


In [2]:
## Setup

input_data = "../datasets/visibility.scan-300.ms/"
cache = (
    "/home/maniaddhanki/workspace/SKAO/ska-sdp-instrumental-calibration/cach"
)
artifacts_prefix_path = "./predict_artifacts/"
upstream_output = UpstreamOutput()

In [3]:
## Load data

nchannels_per_chunk = 32
ntimes_per_chunk = 5
cache_directory = cache
ack = False
datacolumn = "DATA"
field_id = 0
data_desc_id = 0
_cli_args_ = {"input": input_data}
_output_dir_ = artifacts_prefix_path

load_data_stage.stage_definition(
    upstream_output,
    nchannels_per_chunk,
    ntimes_per_chunk,
    cache_directory,
    ack,
    datacolumn,
    field_id,
    data_desc_id,
    _cli_args_,
    _output_dir_,
)

1|2025-10-09T04:01:26.299Z|INFO|MainThread|load_data_stage|load_data.py#181||Reading cached visibilities from path /home/maniaddhanki/workspace/SKAO/ska-sdp-instrumental-calibration/cach/visibility.scan-300.ms_fid0_ddid0


<ska_sdp_instrumental_calibration.scheduler.UpstreamOutput at 0x7b1c406ce320>

In [None]:
## prediction

beam_type = "everybeam"
normalise_at_beam_center = True
eb_ms = None
eb_coeffs = "/home/ska/Work/data/INST/sim/coeffs"
gleamfile = None  # "/home/maniaddhanki/workspace/SKAO/datasets/sky_models/gleamegc.dat"
lsm_csv_path = "/home/ska/Work/data/INST/lg3/sky_model_cal.csv"
fov = 10.0
flux_limit = 1.0
alpha0 = -0.78
_cli_args_ = {"input": input_data}

predict_vis_stage.stage_definition(
    upstream_output,
    beam_type,
    normalise_at_beam_center,
    eb_ms,
    eb_coeffs,
    gleamfile,
    lsm_csv_path,
    fov,
    flux_limit,
    alpha0,
    _cli_args_,
)

In [5]:
## computation

input_vis = upstream_output.vis.compute()
model_vis = upstream_output.modelvis.compute()

In [6]:
## Export Model visibilities

ms_path = os.path.join(artifacts_prefix_path, "inst_predicted_model.ms")
export_visibility_to_ms(ms_path, [model_vis])

In [7]:
## Amp vs UV wave


def get_uv_wave(uvw, frequency):
    c = 3e8
    wavelength = c / frequency
    uvw_t = uvw.transpose("spatial", "time", "baselineid")
    return ((uvw_t[0] ** 2 + uvw_t[1] ** 2) ** 0.5) / wavelength


def plot_amp_uv_wave(input_vis, model_vis, prefix_path):
    fig = plt.figure(layout="constrained", figsize=(10, 5))
    fig.suptitle("Amp vs UVWave", fontsize=16)
    input_fig, model_fig = fig.subplots(1, 2)


def plot_uv_waves(input_vis, model_vis, prefix_path):
    fig = plt.figure(layout="constrained", figsize=(10, 5))
    fig.suptitle("Amp vs UVWave", fontsize=16)
    input_fig, model_fig = fig.subplots(1, 2)

    input_fig.set_ylim(0, 100)
    input_fig.set_title("Input visibilities")
    input_fig.set_xlabel("UVwave (λ)")
    input_fig.set_ylabel("amp")
    input_fig.scatter(
        abs(
            get_uv_wave(input_vis.uvw, input_vis.frequency).stack(
                flatted_dim=("time", "baselineid", "frequency")
            )
        ),
        abs(
            input_vis.vis.isel(polarisation=0).stack(
                flatted_dim=("time", "baselineid", "frequency")
            )
        ),
        s=1.0,
    )

    model_fig.set_ylim(0, 100)
    model_fig.set_title("Inst Predicted Model visibilitites")
    model_fig.set_xlabel("UVwave (λ)")
    model_fig.set_ylabel("amp")
    model_fig.scatter(
        abs(
            get_uv_wave(model_vis.uvw, model_vis.frequency).stack(
                flatted_dim=("time", "baselineid", "frequency")
            )
        ),
        abs(
            model_vis.vis.isel(polarisation=0).stack(
                flatted_dim=("time", "baselineid", "frequency")
            )
        ),
        s=1.0,
    )

    fig.savefig(f"{prefix_path}/amp-uvwave.png")
    plt.close(fig)

In [8]:
## Plotting Amp vs uv wave

plot_amp_uv_wave(input_vis, model_vis, artifacts_prefix_path)

In [None]:
## Amp vs Channel


def plot_amp_freq(
    model_vis, time_step, start_baseline, end_baseline, prefix_path
):
    fig = plt.figure(layout="constrained", figsize=(10, 5))
    fig.suptitle("Amp vs Frequency", fontsize=16)
    xx_ax, yy_ax = fig.subplots(1, 2)

    xx_ax.set_title("Input XX")
    xx_ax.set_xlabel("Channel")
    xx_ax.set_ylabel("Amp")

    yy_ax.set_title("Input YY")
    yy_ax.set_xlabel("Channel")
    yy_ax.set_ylabel("Amp")
    baselines = input_vis.baselineid.values

    for i in range(start_baseline, end_baseline):
        xx_ax.plot(
            abs(model_vis.vis.isel(time=0, baselineid=i, polarisation=0)),
            label=baselines[i],
        )
        yy_ax.plot(
            abs(model_vis.vis.isel(time=0, baselineid=i, polarisation=3)),
            label=baselines[i],
        )

    handles, labels = xx_ax.get_legend_handles_labels()
    fig.legend(handles, labels, title="Baselines", loc="outside center right")
    fig.savefig(f"{prefix_path}/amp-freq.png")

    plt.close(fig)

In [None]:
## Plotting Amp vs channel for model vis
plot_amp_freq(model_vis, 10, 40, 50, artifacts_prefix_path)

In [None]:
## Imaging

### Input visibilities. These visibilities need beam correction. DP3 is used to correct beam and then imaged using wsclean. These are exact same steps done in simulation script

corrected_ms = f"{artifacts_prefix_path}/visibility.scan-300.beamcor.ms"
!DP3 msin=$input_data steps=[applybeam] msout=$corrected_ms

cell_size = 1024
pixel_size = "2arcsec"
input_image_name = f"{artifacts_prefix_path}/input"
model_image_name = f"{artifacts_prefix_path}/inst_predicted_model"
!wsclean -size $cell_size $cell_size -scale $pixel_size -niter 0 -name $input_image_name $corrected_ms
!wsclean -size $cell_size $cell_size -scale $pixel_size -niter 0 -name $model_image_name "$artifacts_prefix_path/inst_predicted_model.ms"

!rm -rf $corrected_ms