# Raw spectrum value check

> Check that all raw spectra have real numbers and unsaturated pixels in their counts arrays.

In [None]:
# |default_exp diagnostics.raw_spectrum_value_check

In [None]:
# |export

import numpy as np

from qagmire.data import (
    get_lr_raw_files,
    read_raw_data,
)
from qagmire.quality_assurance import Diagnostics

To write checks of the data, we create a subclass of `Diagnostics` and implement the `tests` method.

In [None]:
# |export


class RawSpectrumValueCheck(Diagnostics):
    """Raw spectrum value check.

    A reproduction of the class with the same name in the weaveio
    [value_checks](https://github.com/bamford/QAG/blob/master/diagnostics/value_checks.py).

    This tests for the following cases:

    * Are there too many pixels saturated above the ADU threshold?
    * Are there negative pixel values?
    * Are there non-finite pixel values?

    for both counts1 and counts2.
    """

    def __init__(
        self,
        saturation_limit_adu: float = 65535,  # the saturation limit in ADU
        n_allowed_saturated_pixels: int = 0,  # the number of allowed saturated pixels per counts array
        **kwargs,  # additional keyword arguments are passed to the `Diagnostics` constructor
    ):
        self.saturation_limit_adu = saturation_limit_adu
        self.n_allowed_saturated_pixels = n_allowed_saturated_pixels
        super().__init__(**kwargs)

    def tests(self, **kwargs):
        files = get_lr_raw_files(**kwargs)
        data = read_raw_data(files)

        sat = data >= self.saturation_limit_adu
        neg = data < 0
        nan = ~np.isfinite(data)

        count_sat = sat.sum(dim=["dim_0", "dim_1"])
        any_neg = neg.any(dim=["dim_0", "dim_1"])
        any_nan = nan.any(dim=["dim_0", "dim_1"])

        tests = [
            {
                "name": "too_many_sat_in_counts1",
                "description": "Are there too many pixels saturated above the ADU threshold in counts1?",
                "test": count_sat["counts1"] > self.n_allowed_saturated_pixels,
            },
            {
                "name": "neg_pixels_in_counts1",
                "description": "Are there negative pixel values in counts1?",
                "test": any_neg["counts1"],
            },
            {
                "name": "nan_pixels_in_counts1",
                "description": "Are there non-finite pixel values in counts1?",
                "test": any_nan["counts1"],
            },
            {
                "name": "too_many_sat_in_counts2",
                "description": "Are there too many pixels saturated above the ADU threshold in counts2",
                "test": count_sat["counts2"] > self.n_allowed_saturated_pixels,
            },
            {
                "name": "neg_pixels_in_counts2",
                "description": "Are there negative pixel values in counts2?",
                "test": any_neg["counts2"],
            },
            {
                "name": "nan_pixels_in_counts2",
                "description": "Are there non-finite pixel values in counts2?",
                "test": any_nan["counts2"],
            },
        ]
        return tests

## Demonstration tests

Here we use multiple `dask` workers to speed up this test. We are checking 10 billion pixel values. On a single core, this takes ~110 seconds. With 8 workers, it takes ~22 seconds.

In [None]:
tests = RawSpectrumValueCheck(n_processes=8)
tests.run(date="201*")

Locating and converting where necessary: 100%|██████████| 252/252 [00:00<00:00, 6669.81it/s]
Reading netCDF files... took 4.56 s. Size is 37566.843 Mb
Tests took 37.80 s to perform.
too_many_sat_in_counts1:
    Are there too many pixels saturated above the ADU threshold in counts1?
neg_pixels_in_counts1:
    Are there negative pixel values in counts1?
nan_pixels_in_counts1:
    Are there non-finite pixel values in counts1?
too_many_sat_in_counts2:
    Are there too many pixels saturated above the ADU threshold in counts2
neg_pixels_in_counts2:
    Are there negative pixel values in counts2?
nan_pixels_in_counts2:
    Are there non-finite pixel values in counts2?


In [None]:
tests.summary()

Unnamed: 0_level_0,failed,failed,total fails
test,too_many_sat_in_counts1,too_many_sat_in_counts2,Unnamed: 3_level_1
filename,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
r1002213,True,False,1
r1002215,True,False,1
r1002217,True,False,1
r1002219,False,True,1
r1002243,False,True,1
r1002307,False,True,1
r1003335,False,True,1


In [None]:
# |hide
import nbdev

nbdev.nbdev_export()