# Raw spectrum value check

> Check that all raw spectra have real numbers and unsaturated pixels in their counts arrays.

In [None]:
# |default_exp diagnostics.raw_spectrum_value_check

In [None]:
# |export

import numpy as np

from qagmire.data import (
    get_lr_raw_files,
    read_raw_data,
)
from qagmire.quality_assurance import Diagnostics

To write checks of the data, we create a subclass of `Diagnostics` and implement the `tests` method.

In [None]:
# |export


class RawSpectrumValueCheck(Diagnostics):
    """Raw spectrum value check.

    A reproduction of the class with the same name in the weaveio
    [value_checks](https://github.com/bamford/QAG/blob/master/diagnostics/value_checks.py).

    This tests for the following cases:

    * Are there too many pixels saturated above the ADU threshold?
    * Are there negative pixel values?
    * Are there non-finite pixel values?

    for both counts1 and counts2.
    """

    def __init__(
        self,
        saturation_limit_adu: float = 65535,  # the saturation limit in ADU
        n_allowed_saturated_pixels: int = 0,  # the number of allowed saturated pixels per counts array
        **kwargs,  # additional keyword arguments are passed to the `Diagnostics` constructor
    ):
        self.saturation_limit_adu = saturation_limit_adu
        self.n_allowed_saturated_pixels = n_allowed_saturated_pixels
        super().__init__(**kwargs)

    def tests(self, **kwargs):
        files = get_lr_raw_files(**kwargs)
        data = read_raw_data(files)

        sat = data >= self.saturation_limit_adu
        neg = data < 0
        nan = ~np.isfinite(data)

        count_sat = sat.sum(dim=["dim_0", "dim_1"])
        any_neg = neg.any(dim=["dim_0", "dim_1"])
        any_nan = nan.any(dim=["dim_0", "dim_1"])

        tests = [
            {
                "name": "too_many_sat_in_counts1",
                "description": "Are there too many pixels saturated above the ADU threshold in counts1?",
                "test": count_sat["counts1"] > self.n_allowed_saturated_pixels,
            },
            {
                "name": "neg_pixels_in_counts1",
                "description": "Are there negative pixel values in counts1?",
                "test": any_neg["counts1"],
            },
            {
                "name": "nan_pixels_in_counts1",
                "description": "Are there non-finite pixel values in counts1?",
                "test": any_nan["counts1"],
            },
            {
                "name": "too_many_sat_in_counts2",
                "description": "Are there too many pixels saturated above the ADU threshold in counts2",
                "test": count_sat["counts2"] > self.n_allowed_saturated_pixels,
            },
            {
                "name": "neg_pixels_in_counts2",
                "description": "Are there negative pixel values in counts2?",
                "test": any_neg["counts2"],
            },
            {
                "name": "nan_pixels_in_counts2",
                "description": "Are there non-finite pixel values in counts2?",
                "test": any_nan["counts2"],
            },
        ]
        return tests

## Demonstration tests

Here we use multiple `dask` workers to speed up this test. We are checking 10 billion pixel values. On a single core, this takes ~110 seconds. With 8 workers, it takes ~22 seconds.

In [None]:
tests = RawSpectrumValueCheck(n_processes=8)
tests.run(date="201*")

Locating and converting where necessary:   0%|                | 0/252 [00:00<?, ?it/s]

Locating and converting where necessary:   0%|        | 1/252 [00:01<07:00,  1.68s/it]

Locating and converting where necessary:   2%|▏       | 5/252 [00:01<01:08,  3.63it/s]

Locating and converting where necessary:   3%|▏       | 7/252 [00:01<00:50,  4.87it/s]

Locating and converting where necessary:   4%|▎       | 9/252 [00:03<01:29,  2.71it/s]

Locating and converting where necessary:   4%|▎      | 11/252 [00:03<01:06,  3.63it/s]

Locating and converting where necessary:   5%|▎      | 13/252 [00:03<00:52,  4.57it/s]

Locating and converting where necessary:   6%|▍      | 15/252 [00:03<00:43,  5.45it/s]

Locating and converting where necessary:   6%|▍      | 16/252 [00:05<01:30,  2.60it/s]

Locating and converting where necessary:   7%|▌      | 18/252 [00:05<01:04,  3.66it/s]

Locating and converting where necessary:   8%|▌      | 21/252 [00:05<00:46,  4.97it/s]

Locating and converting where necessary:   9%|▌      | 22/252 [00:05<00:49,  4.69it/s]

Locating and converting where necessary:   9%|▋      | 23/252 [00:06<01:20,  2.83it/s]

Locating and converting where necessary:  10%|▋      | 24/252 [00:07<01:16,  2.98it/s]

Locating and converting where necessary:  11%|▊      | 28/252 [00:07<00:40,  5.55it/s]

Locating and converting where necessary:  12%|▊      | 30/252 [00:07<00:49,  4.45it/s]

Locating and converting where necessary:  12%|▊      | 31/252 [00:08<00:56,  3.91it/s]

Locating and converting where necessary:  13%|▉      | 32/252 [00:08<01:01,  3.59it/s]

Locating and converting where necessary:  14%|▉      | 35/252 [00:09<00:48,  4.43it/s]

Locating and converting where necessary:  14%|█      | 36/252 [00:09<00:44,  4.81it/s]

Locating and converting where necessary:  15%|█      | 37/252 [00:09<00:48,  4.40it/s]

Locating and converting where necessary:  15%|█      | 38/252 [00:09<00:49,  4.29it/s]

Locating and converting where necessary:  15%|█      | 39/252 [00:10<01:09,  3.05it/s]

Locating and converting where necessary:  17%|█▏     | 42/252 [00:10<00:44,  4.75it/s]

Locating and converting where necessary:  17%|█▏     | 44/252 [00:11<00:43,  4.83it/s]

Locating and converting where necessary:  18%|█▎     | 45/252 [00:11<00:44,  4.63it/s]

Locating and converting where necessary:  18%|█▎     | 46/252 [00:12<01:10,  2.92it/s]

Locating and converting where necessary:  19%|█▎     | 48/252 [00:12<00:52,  3.86it/s]

Locating and converting where necessary:  19%|█▎     | 49/252 [00:12<00:55,  3.65it/s]

Locating and converting where necessary:  20%|█▍     | 51/252 [00:13<00:48,  4.11it/s]

Locating and converting where necessary:  21%|█▍     | 52/252 [00:13<00:46,  4.28it/s]

Locating and converting where necessary:  21%|█▌     | 54/252 [00:13<00:47,  4.18it/s]

Locating and converting where necessary:  22%|█▌     | 55/252 [00:14<00:42,  4.63it/s]

Locating and converting where necessary:  22%|█▌     | 56/252 [00:14<00:38,  5.07it/s]

Locating and converting where necessary:  23%|█▌     | 57/252 [00:14<00:46,  4.22it/s]

Locating and converting where necessary:  23%|█▌     | 58/252 [00:14<00:41,  4.63it/s]

Locating and converting where necessary:  23%|█▋     | 59/252 [00:15<00:43,  4.48it/s]

Locating and converting where necessary:  24%|█▋     | 60/252 [00:15<00:48,  3.98it/s]

Locating and converting where necessary:  24%|█▋     | 61/252 [00:15<01:01,  3.13it/s]

Locating and converting where necessary:  25%|█▋     | 62/252 [00:16<00:56,  3.37it/s]

Locating and converting where necessary:  25%|█▊     | 63/252 [00:16<00:49,  3.82it/s]

Locating and converting where necessary:  25%|█▊     | 64/252 [00:16<00:45,  4.16it/s]

Locating and converting where necessary:  26%|█▊     | 65/252 [00:16<00:39,  4.72it/s]

Locating and converting where necessary:  27%|█▊     | 67/252 [00:16<00:29,  6.30it/s]

Locating and converting where necessary:  27%|█▉     | 68/252 [00:16<00:30,  6.03it/s]

Locating and converting where necessary:  27%|█▉     | 69/252 [00:17<00:51,  3.59it/s]

Locating and converting where necessary:  28%|█▉     | 70/252 [00:17<00:49,  3.71it/s]

Locating and converting where necessary:  28%|█▉     | 71/252 [00:17<00:41,  4.41it/s]

Locating and converting where necessary:  29%|██     | 72/252 [00:18<00:34,  5.17it/s]

Locating and converting where necessary:  29%|██     | 73/252 [00:18<00:41,  4.29it/s]

Locating and converting where necessary:  30%|██     | 76/252 [00:18<00:24,  7.18it/s]

Locating and converting where necessary:  31%|██▏    | 77/252 [00:19<00:36,  4.79it/s]

Locating and converting where necessary:  31%|██▏    | 78/252 [00:19<00:51,  3.37it/s]

Locating and converting where necessary:  31%|██▏    | 79/252 [00:20<00:55,  3.10it/s]

Locating and converting where necessary:  32%|██▎    | 81/252 [00:23<02:24,  1.18it/s]

Locating and converting where necessary:  33%|██▎    | 83/252 [00:23<01:36,  1.75it/s]

Locating and converting where necessary:  34%|██▍    | 86/252 [00:24<01:29,  1.86it/s]

Locating and converting where necessary:  35%|██▍    | 88/252 [00:25<01:05,  2.51it/s]

Locating and converting where necessary:  36%|██▌    | 91/252 [00:25<00:41,  3.86it/s]

Locating and converting where necessary:  37%|██▌    | 93/252 [00:29<01:57,  1.35it/s]

Locating and converting where necessary:  37%|██▌    | 94/252 [00:30<02:09,  1.22it/s]

Locating and converting where necessary:  39%|██▋    | 98/252 [00:30<01:07,  2.28it/s]

Locating and converting where necessary:  40%|██▍   | 101/252 [00:30<00:50,  3.02it/s]

Locating and converting where necessary:  41%|██▍   | 103/252 [00:32<00:59,  2.49it/s]

Locating and converting where necessary:  42%|██▌   | 106/252 [00:32<00:40,  3.58it/s]

Locating and converting where necessary:  43%|██▌   | 108/252 [00:32<00:34,  4.21it/s]

Locating and converting where necessary:  44%|██▌   | 110/252 [00:33<00:48,  2.91it/s]

Locating and converting where necessary:  44%|██▋   | 111/252 [00:33<00:43,  3.24it/s]

Locating and converting where necessary:  45%|██▋   | 114/252 [00:34<00:27,  4.96it/s]

Locating and converting where necessary:  46%|██▊   | 116/252 [00:35<00:43,  3.10it/s]

Locating and converting where necessary:  47%|██▊   | 119/252 [00:35<00:32,  4.09it/s]

Locating and converting where necessary:  48%|██▉   | 122/252 [00:35<00:23,  5.61it/s]

Locating and converting where necessary:  49%|██▉   | 124/252 [00:36<00:33,  3.81it/s]

Locating and converting where necessary:  67%|████  | 168/252 [00:36<00:02, 31.41it/s]

Locating and converting where necessary:  98%|█████▊| 246/252 [00:37<00:00, 87.48it/s]

Locating and converting where necessary: 100%|██████| 252/252 [00:38<00:00,  6.55it/s]


Reading netCDF files... 

took 4.58 s. Size is 37566.851 Mb
Tests took 50.49 s to prepare (including reading data).


2024-03-19 16:58:42,116 - distributed.worker - ERROR - Failed to communicate with scheduler during heartbeat.
Traceback (most recent call last):
  File "/home2/bamford/anaconda/envs/qagmire/lib/python3.12/site-packages/distributed/comm/tcp.py", line 225, in read
    frames_nosplit_nbytes_bin = await stream.read_bytes(fmt_size)
                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
tornado.iostream.StreamClosedError: Stream is closed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home2/bamford/anaconda/envs/qagmire/lib/python3.12/site-packages/distributed/worker.py", line 1252, in heartbeat
    response = await retry_operation(
               ^^^^^^^^^^^^^^^^^^^^^^
  File "/home2/bamford/anaconda/envs/qagmire/lib/python3.12/site-packages/distributed/utils_comm.py", line 455, in retry_operation
    return await retry(
           ^^^^^^^^^^^^
  File "/home2/bamford/anaconda/envs/qagmire/lib/python3.12/site-pack

Tests took 27.21 s to perform.
too_many_sat_in_counts1:
    Are there too many pixels saturated above the ADU threshold in counts1?
neg_pixels_in_counts1:
    Are there negative pixel values in counts1?
nan_pixels_in_counts1:
    Are there non-finite pixel values in counts1?
too_many_sat_in_counts2:
    Are there too many pixels saturated above the ADU threshold in counts2
neg_pixels_in_counts2:
    Are there negative pixel values in counts2?
nan_pixels_in_counts2:
    Are there non-finite pixel values in counts2?


In [None]:
tests.summary()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,failed,failed,total fails
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,test,too_many_sat_in_counts1,too_many_sat_in_counts2,Unnamed: 8_level_1
filename,RUN,CAMERA,MJD,NIGHT,OBID,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
r1002213,1002213,RED,57639.8653,20160908,3191,True,False,1
r1002215,1002215,RED,57639.8784,20160908,3191,True,False,1
r1002217,1002217,RED,57639.8916,20160908,3191,True,False,1
r1002219,1002219,RED,57639.9048,20160908,3191,False,True,1
r1002243,1002243,RED,57639.994,20160908,3346,False,True,1
r1002307,1002307,RED,57640.9996,20160909,3217,False,True,1
r1003335,1003335,RED,57809.0648,20170224,3900,False,True,1


In [None]:
# |hide
import nbdev

nbdev.nbdev_export()