# Line flux existence check

> Check that line fluxes are not measured where they shouldn't be, and vice versa.

In [None]:
# |default_exp diagnostics.line_flux_check

In [None]:
# |export

import numpy as np
import xarray as xr

from qagmire.data import (
    get_lr_l2_stack_files,
    read_class_spec,
    read_class_table,
    read_galaxy_table,
)
from qagmire.quality_assurance import Diagnostics
from qagmire.utilities import parse_line_names

To write checks of the data, we create a subclass of `Diagnostics` and implement the `tests` method.

In [None]:
# |exports


class LineFluxCheck(Diagnostics):
    """Line flux existence check.

    A reproduction of the weaveio [line_flux_check](https://github.com/bamford/QAG/blob/master/diagnostics/line_flux_check.py).

    This tests for the following cases:

    * Do non-null line fluxes appear in completely null spectra?
    * Do non-null line fluxes appear in the blue chip gap?
    * Do non-null line fluxes appear in the red chip gap?
    * Do non-null line fluxes appear outside the observed wavelength range?
    * Do null line fluxes appear in an observed wavelength range?
    """

    @staticmethod
    def _line_wavelengths(
        data: xr.Dataset,  # merged galaxy_table and class_table
    ) -> xr.Dataset:  # the observed wavelength of every potential line
        """Determine the expected observed wavelengths of all potential lines."""
        line_species, line_rest_wl = parse_line_names(data["LINE"])
        line_wl = (1 + data["Z"]) * line_rest_wl
        return line_wl

    @staticmethod
    def _wavelength_boundaries(
        data: xr.Dataset,  # including class_table, providing the rebinned spectra to check
    ) -> tuple[dict, dict]:  # the determined boundaries
        """Determine wavelength boundaries and wavelength gaps of blue and red spectra.

        Where a spectrum is entirely null, the returned gaps and boundaries will also be null.

        Returns two dictionaries, `boundaries` and `gaps`, each containing `low` and `high` entries,
        which are Datasets giving the low and high boundaries and gap edges determined for each spectrum.
        """
        gaps = {}
        boundaries = {}
        for band, low, high in (("B", 4000, 6000), ("R", 6000, 9000)):
            wl_dim = f"LAMBDA_{band}"
            wl = data[wl_dim]
            null_flux = data[f"FLUX_RR_{band}"].isnull()
            wl_null = wl.where(null_flux & (wl > low) & (wl < high))
            wl_not_null = wl.where(~null_flux)
            with np.errstate(invalid="ignore"):
                gaps[band] = {
                    "low": wl_null.min(dim=wl_dim),
                    "high": wl_null.max(dim=wl_dim),
                }
                boundaries[band] = {
                    "low": wl_not_null.min(dim=wl_dim),
                    "high": wl_not_null.max(dim=wl_dim),
                }
        return boundaries, gaps

    def tests(self, **kwargs):
        lr_l2_stack_files = get_lr_l2_stack_files(**kwargs)

        data = xr.merge(
            (
                read_class_spec(lr_l2_stack_files),
                read_galaxy_table(lr_l2_stack_files),
                read_class_table(lr_l2_stack_files),
            )
        )

        # perform the tests by OBID, rather than filename
        data = data.swap_dims(filename="OBID")

        line_wl = self._line_wavelengths(data)
        boundaries, gaps = self._wavelength_boundaries(data)

        measured_line_flux = data["LINES"].sel(QTY="FLUX", drop=True)
        null_flux = measured_line_flux.isnull()

        is_in_red_gap = (line_wl > gaps["R"]["low"]) & (line_wl < gaps["R"]["high"])
        is_in_blue_gap = (line_wl > gaps["B"]["low"]) & (line_wl < gaps["B"]["high"])

        # ignore gaps in completely null spectra
        is_in_red_gap = is_in_red_gap.fillna(False)
        is_in_blue_gap = is_in_blue_gap.fillna(False)

        is_in_gap = is_in_blue_gap | is_in_red_gap

        is_off_spectrum = (
            (line_wl < boundaries["B"]["low"]) | (line_wl > boundaries["B"]["high"])
        ) & ((line_wl < boundaries["R"]["low"]) | (line_wl > boundaries["R"]["high"]))

        is_on_spectrum = ~is_in_gap & ~is_off_spectrum

        # ignore whether on/off spectrum for completely null spectra
        is_off_spectrum = is_off_spectrum.fillna(False)
        is_on_spectrum = is_in_blue_gap.fillna(False)

        null_spectrum = (
            boundaries["B"]["low"].isnull() | boundaries["R"]["low"].isnull()
        )

        tests = [
            {
                "name": "line_in_null_spectrum",
                "description": "Do non-null line fluxes appear in completely null spectra?",
                "test": ~null_flux & null_spectrum,
            },
            {
                "name": "line_in_blue_chip_gap",
                "description": "Do non-null line fluxes appear in the blue chip gap?",
                "test": ~null_flux & is_in_blue_gap,
            },
            {
                "name": "line_in_red_chip_gap",
                "description": "Do non-null line fluxes appear in the red chip gap?",
                "test": ~null_flux & is_in_red_gap,
            },
            {
                "name": "line_off_spectrum",
                "description": "Do non-null line fluxes appear outside the observed wavelength range?",
                "test": ~null_flux & is_off_spectrum,
            },
            {
                "name": "null_line_on_spectrum",
                "description": "Do null line fluxes appear in an observed wavelength range?",
                "test": null_flux & is_on_spectrum,
            },
        ]
        return tests

In [None]:
tests = LineFluxCheck()
tests.run(date="201*")

Locating and converting where necessary: 100%|██████████| 17/17 [00:00<00:00, 3093.95it/s]
Reading netCDF files... took 1.71 s. Size is 4851.652 Mb
Locating and converting where necessary: 100%|██████████| 17/17 [00:00<00:00, 4429.04it/s]
Reading netCDF files... took 2.94 s. Size is 77.962 Mb
Locating and converting where necessary: 100%|██████████| 17/17 [00:00<00:00, 8410.38it/s]
Reading netCDF files... took 3.16 s. Size is 509.241 Mb
Tests took 5.23 s to perform.
line_in_null_spectrum:
    Do non-null line fluxes appear in completely null spectra?
line_in_blue_chip_gap:
    Do non-null line fluxes appear in the blue chip gap?
line_in_red_chip_gap:
    Do non-null line fluxes appear in the red chip gap?
line_off_spectrum:
    Do non-null line fluxes appear outside the observed wavelength range?
null_line_on_spectrum:
    Do null line fluxes appear in an observed wavelength range?


In [None]:
tests.summary(by="OBID", top=None, show_passed_tests=True)

Unnamed: 0_level_0,failed,failed,failed,failed,failed,total fails
test,line_in_null_spectrum,line_in_blue_chip_gap,line_in_red_chip_gap,line_off_spectrum,null_line_on_spectrum,Unnamed: 6_level_1
OBID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
3900,0,162,226,2646,2,3036
3372,0,144,178,2695,4,3021
3756,0,161,194,2651,1,3007
3653,0,165,196,2558,1,2920
3295,0,141,200,2541,0,2882
3803,0,164,195,2519,3,2881
3806,0,165,202,2497,4,2868
3802,0,158,185,2508,5,2856
3217,0,37,87,1781,1,1906
3346,0,22,94,1684,1,1801


In [None]:
tests.summary(by="LINE")

Unnamed: 0_level_0,failed,failed,failed,failed,total fails
test,line_in_blue_chip_gap,line_in_red_chip_gap,line_off_spectrum,null_line_on_spectrum,Unnamed: 5_level_1
LINE,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
[ArIII]_7135.67,0,55,4343,0,4398
[SII2]_6730.68,0,53,3583,0,3636
[SII]_6716.31,0,51,3576,0,3627
[NII]_6583.34,0,47,3361,0,3408
Ha_6562.80,0,52,3313,0,3365
[OI]_6300.20,0,73,2776,0,2849
HeI_5875.60,1,136,1837,1,1975
HeII_3203.15,160,0,1159,2,1321
[NeV]_3345.81,135,0,969,0,1104
[NeV]_3425.81,115,0,871,1,987


In [None]:
tests.summary(by="APS_ID", top=10)

Unnamed: 0_level_0,failed,failed,failed,failed,total fails
test,line_in_blue_chip_gap,line_in_red_chip_gap,line_off_spectrum,null_line_on_spectrum,Unnamed: 5_level_1
APS_ID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
456,0,5,79,0,84
766,1,7,74,0,82
728,3,3,74,0,80
989,2,8,69,0,79
746,1,1,72,0,74
615,5,15,53,0,73
40,1,5,67,0,73
62,2,6,64,0,72
308,4,3,65,0,72
273,2,3,67,0,72


In [None]:
# |hide
import nbdev

nbdev.nbdev_export()