# Raw spectrum value check

> Check that all raw spectra have real numbers and unsaturated pixels in their counts arrays.

In [None]:
# |default_exp diagnostics.raw_spectrum_value_check

In [None]:
# |export

import numpy as np
import xarray as xr

from qagmire.data import (
    get_lr_raw_files,
    read_raw_data,
)
from qagmire.quality_assurance import Diagnostics

To write checks of the data, we create a subclass of `Diagnostics` and implement the `tests` method.

In [None]:
# |export


class RawSpectrumValueCheck(Diagnostics):
    """Raw spectrum value check.

    A reproduction of the class with the same name in the weaveio
    [value_checks](https://github.com/bamford/QAG/blob/master/diagnostics/value_checks.py).

    This tests for the following cases:

    * Are there too many pixels saturated above the ADU threshold?
    * Are there negative pixel values?
    * Are there non-finite pixel values?

    for both counts1 and counts2.
    """

    def __init__(
        self,
        saturation_limit_adu: float = 65535,  # the saturation limit in ADU
        n_allowed_saturated_pixels: int = 0,  # the number of allowed saturated pixels per counts array
        **kwargs,  # additional keyword arguments are passed to the `Diagnostics` constructor
    ):
        self.saturation_limit_adu = saturation_limit_adu
        self.n_allowed_saturated_pixels = n_allowed_saturated_pixels
        super().__init__(**kwargs)

    def tests(self, **kwargs):
        files = get_lr_raw_files(**kwargs)
        self.data = read_raw_data(files)
        self.stats = xr.Dataset()
        tests = []
        for i in [1, 2]:
            sat = self.data[f"counts{i}"] >= self.saturation_limit_adu
            self.stats[f"counts{i}_sat"] = sat.sum(dim=["dim_0", "dim_1"])
            neg = self.data[f"counts{i}"] < 0
            self.stats[f"counts{i}_neg"] = neg.sum(dim=["dim_0", "dim_1"])
            nan = ~np.isfinite(self.data[f"counts{i}"])
            self.stats[f"counts{i}_nan"] = nan.sum(dim=["dim_0", "dim_1"])
            tests.extend(
                [
                    {
                        "name": f"too_many_sat_in_counts{i}",
                        "description": f"Are there too many pixels saturated above the ADU threshold in counts{i}?",
                        "test": self.stats[f"counts{i}_sat"]
                        > self.n_allowed_saturated_pixels,
                    },
                    {
                        "name": f"neg_pixels_in_counts{i}",
                        "description": f"Are there negative pixel values in counts{i}?",
                        "test": self.stats[f"counts{i}_neg"] > 0,
                    },
                    {
                        "name": f"nan_pixels_in_counts{i}",
                        "description": f"Are there non-finite pixel values in counts{i}?",
                        "test": self.stats[f"counts{i}_nan"] > 0,
                    },
                ]
            )
        return tests

## Demonstration tests

Here we use multiple `dask` workers to speed up this test. We are checking 10 billion pixel values. On a single core, this takes ~110 seconds. With 8 workers, it takes ~22 seconds.

In [None]:
tests = RawSpectrumValueCheck(n_processes=8)
tests.run(date="201*")

Locating and converting where necessary:   0%|                                                                                     | 0/252 [00:00<?, ?it/s]

Locating and converting where necessary: 100%|█████████████████████████████████████████████████████████████████████████| 252/252 [00:00<00:00, 6093.70it/s]


Reading netCDF files... 

took 6.40 s. Size is 37566.851 Mb
Tests took 10.47 s to prepare (including reading data).


Tests took 32.23 s to perform.
too_many_sat_in_counts1:
    Are there too many pixels saturated above the ADU threshold in counts1?
neg_pixels_in_counts1:
    Are there negative pixel values in counts1?
nan_pixels_in_counts1:
    Are there non-finite pixel values in counts1?
too_many_sat_in_counts2:
    Are there too many pixels saturated above the ADU threshold in counts2?
neg_pixels_in_counts2:
    Are there negative pixel values in counts2?
nan_pixels_in_counts2:
    Are there non-finite pixel values in counts2?


In [None]:
tests.summary()

6 varieties of test and 252 tested elements per variety, for total of 1512 tests.
7 tests failed (0.46%) and 1505 tests passed (99.54%).


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,failed,failed,total fails
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,test,too_many_sat_in_counts1,too_many_sat_in_counts2,Unnamed: 8_level_1
filename,RUN,CAMERA,MJD,NIGHT,OBID,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
r1002213,1002213,RED,57639.865255,20160908,3191,True,False,1
r1002215,1002215,RED,57639.878449,20160908,3191,True,False,1
r1002217,1002217,RED,57639.891644,20160908,3191,True,False,1
r1002219,1002219,RED,57639.904838,20160908,3191,False,True,1
r1002243,1002243,RED,57639.993993,20160908,3346,False,True,1
r1002307,1002307,RED,57640.999641,20160909,3217,False,True,1
r1003335,1003335,RED,57809.064769,20170224,3900,False,True,1


## Validation

In [None]:
df = tests.stats.to_dataframe()
df = df.reset_index().set_index([c for c in tests.stats.coords])
df = df[df.sum(axis="columns") > 0]
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,counts1_sat,counts1_neg,counts1_nan,counts2_sat,counts2_neg,counts2_nan
filename,RUN,CAMERA,MJD,NIGHT,OBID,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
r1002217,1002217,RED,57639.891644,20160908,3191,4,0,0,0,0,0
r1002219,1002219,RED,57639.904838,20160908,3191,0,0,0,1,0,0
r1002213,1002213,RED,57639.865255,20160908,3191,1,0,0,0,0,0
r1002215,1002215,RED,57639.878449,20160908,3191,1,0,0,0,0,0
r1002243,1002243,RED,57639.993993,20160908,3346,0,0,0,2,0,0
r1002307,1002307,RED,57640.999641,20160909,3217,0,0,0,1,0,0
r1003335,1003335,RED,57809.064769,20170224,3900,0,0,0,1,0,0


In [None]:
# |hide
import nbdev

nbdev.nbdev_export()