# data

> Functionality for efficiently accessing data in WEAVE fits files

In [3]:
# |default_exp data

In [4]:
# | hide
from nbdev.showdoc import show_doc

In [5]:
# |export

import os
import sys
import time
from functools import partial, wraps
from glob import glob
from multiprocessing import Pool
from typing import Callable

import numpy as np
import xarray as xr
from astropy.io import fits
from astropy.table import Table
from tqdm import tqdm

from qagmire.utilities import mjd_to_night

The `data` module provides functions for locating and reading WEAVE data as `xarray` `Dataset`s.

## Accessing FITS tables as xarray Datasets via cached netCDF files

The approach of `qagmire` is to access WEAVE data stored on disk in FITS files on disk. To analyse this large, multi-dimensional dataset we utilize [`xarray`](https://docs.xarray.dev), making use of its ability to use `dask` to perform computations in parallel in a memory efficient and scaleable manner. To make this work we need to write the data to disk in a suitable format: netCDF files, then load those files as an `xarray.Dataset`. This functionality is implemented in `FITStoDataset`, which is used to wrap simpler functions which focus on reading data from a single FITS file into a convenient `Dataset`. Reading the original FITS files, and caching them to NetCDF, is (by default) parallelised using `multiprocessing`. In general, the aim is for the resulting `Dataset`s to preserve the structure in the FITS files. However, there are some cases where it is sensible to rearrange the data into a more convenient and/or efficient format. 

In [6]:
# |export


def _read_single(
    read_function: Callable[[str], xr.Dataset],  # function to read a FITS file
    fn: str,  # filename of FITS file to read
):
    """Read a FITS file to an xarray Dataset using the given read function."""
    ds = read_function(fn)
    if ds:
        fn_base = os.path.splitext(os.path.basename(fn))[0]
        ds = ds.expand_dims({"filename": [fn_base]})
        try:
            run = fits.getval(fn, "RUN")
            ds = ds.assign_coords(RUN=("filename", [run]))
        except KeyError:
            pass
        try:
            camera = fits.getval(fn, "CAMERA")
            camera = camera.replace("WEAVE", "")
            ds = ds.assign_coords(CAMERA=("filename", [camera]))
        except KeyError:
            pass
        try:
            mjd = np.round(fits.getval(fn, "MJD-OBS"), 4)
            ds = ds.assign_coords(MJD=("filename", [mjd]))
            night = mjd_to_night(mjd)
            ds = ds.assign_coords(NIGHT=("filename", [night]))
        except KeyError:
            pass
        try:
            obid = fits.getval(fn, "OBID")
            ds = ds.assign_coords(OBID=("filename", [obid]))
        except KeyError:
            pass
    else:
        table = read_function.__name__.replace("read_", "")
        print(f"Warning: cannot read {table} for file {fn}.")
    return ds


def _single_via_netcdf(
    read_function: Callable[[str], xr.Dataset],  # function to read a FITS file
    fn: str,  # filename of FITS file to read
    netcdf_store,
    update_cache,
):
    """Transform a FITS file to netCDF using the given read function.

    Returns the path of a netCDF file stored in `nedcdf_store`, containing the `Dataset` resulting
    from calling `read_function` with the supplied FITS filename `fn`. If the netCDF file already
    exists, the filename is immediately returned.
    """
    fn_netcdf = os.path.join(netcdf_store, *os.path.normpath(fn).split(os.sep)[-3:])
    table = read_function.__name__.replace("read_", "")
    fn_netcdf = os.path.splitext(fn_netcdf)[0]
    fn_netcdf = f"{fn_netcdf}_{table}.nc"
    if not os.path.exists(fn_netcdf) or update_cache:
        ds = _read_single(read_function, fn)
        if ds:
            os.makedirs(os.path.dirname(fn_netcdf), exist_ok=True)
            ds.to_netcdf(fn_netcdf, format="NETCDF4", engine="netcdf4")
            ds.close()
        else:
            fn_netcdf = None
    return fn_netcdf


class FITStoDataset:
    """Access multiple FITS tables as an xarray Dataset, optionally via cached netCDF files.

    For each FITS table or image we wish to read, we will write a `read_*(fn)` function
    which reads the table from the single provided filename `fn` and returns a `Dataset`.
    Wrapping such a function with an instance of this class adapts the function to take
    a list of FITS filenames and return a `Dataset`. If `cache=True`, the Dataset is
    lazily loaded data from a cache of netCDF files. The cache is stored in the
    `netcdf_store` folder defined when the instance is initialised.

    If `cache=True`, when the wrapped function is initially run, it repeatedly calls
    `single_via_netcdf` to apply the original `read_*` function to each FITS filename and
    save each resulting `Dataset` as a netCDF file, then opens them together and returns a
    combined, distributed `Dataset`. If a previously converted file is found in the
    `netcdf_store`, then the original `read_*` function is skipped and the netCDF file loaded
    directly. This caching can vastly increase the speed of subsequent calls.
    If `n_processes > 1`, which it is by default, then this reading and caching is performed
    in parallel using `n_processes` processes.

    Although instances of this class can be used as a decorator, doing so with
    `n_processes > 1` will lead to an exception due to pickling issues with multiprocessing.
    Instead they should be used to wrap functions, without replacing the original function name.
    For example,
    ```
    to_dataset = FITStoDataset()

    def _class_spec_reader(fn):
        ...

    read_class_spec = to_dataset(_class_spec_reader)
    ```

    If a source FITS file is changed, the corresponding files in `netcdf_store` can simply
    be deleted and they will be recreated on the next call of the decorated `read_*`
    function.

    If `cache=False`, then the data is always read from the specified FITS files, combined
    and returned as an in-memory Dataset. This may be faster when dealing with lots of small
    files.

    If `update_cache=False`, then existing cache files are not read, but are recreated.

    """

    def __init__(
        self,
        cache=True,  # cache the dataset to netCDF files
        netcdf_store: str | None = None,  # folder in which to store the netCDF files
        progress=True,  # display a progress bar
        update_cache=False,  # read FITS files and recreate netCDF files, no effect if cache=False
        n_processes=8,  # how many subprocesses to use
    ):
        """Create a decorator that can extend a `read_*` function to multiple files.

        If no `netcdf_store` is provided it first checks for a `NETCDF_STORE` environment
        variable and falls back to a folder called `netcdf_store` in the user's home folder.
        """
        self.cache = cache
        self.update_cache = update_cache
        self.n_processes = n_processes

        if netcdf_store is not None:
            self.netcdf_store = netcdf_store
        else:
            default = "/beegfs/weavelofar/netcdf_store"
            self.netcdf_store = os.environ.get("NETCDF_STORE", default)

        if progress:
            if cache:
                desc = "Locating and converting where necessary"
            else:
                desc = "Reading files"
            self.progress = partial(tqdm, desc=desc, file=sys.stdout)
        else:
            self.progress = lambda x: x

    def __call__(
        self,
        read_function: Callable[[str], xr.Dataset],  # function to read a FITS file
    ):
        """Extend the functionality of `read_function` to multiple files.

        The wrapped `read_function` is adapted to take a list of FITS filenames and
        return a `Dataset`, which lazily loads data from a cache of netCDF files if
        `self.cache=True` (the default).
        """

        @wraps(read_function)
        def wrapper(fns):
            if self.cache:
                read = partial(
                    _single_via_netcdf,
                    read_function,
                    netcdf_store=self.netcdf_store,
                    update_cache=self.update_cache,
                )
            else:
                read = partial(_read_single, read_function)

            if self.n_processes > 1:
                results = []
                try:
                    with Pool(self.n_processes) as p:
                        with self.progress(total=len(fns)) as pbar:
                            for f in p.imap_unordered(read, fns):
                                results.append(f)
                                pbar.update()
                except PermissionError:
                    raise PermissionError(
                        "Cannot access the NetCDF file. Ensure any previously "
                        "created Datasets are closed, e.g. ds.close()"
                    )
            else:
                results = [read(fn) for fn in self.progress(fns)]

            results = [f for f in results if f is not None]

            if self.cache:
                print("Reading netCDF files... ", end="")
                start = time.perf_counter()
                data = xr.open_mfdataset(
                    results,
                    combine="nested",
                    coords="minimal",
                    concat_dim="filename",
                    engine="netcdf4",
                )
            else:
                print("Creating Dataset... ", end="")
                start = time.perf_counter()
                data = xr.concat(results, dim="filename", coords="minimal")
            dt = time.perf_counter() - start
            print(f"took {dt:.2f} s. Size is {data.nbytes * 2**-20:.3f} Mb")
            return data

        return wrapper

### Create the `to_dataset` and `to_dataset_without_cache` wrapping functions

These are used to wrap all the `read_*` functions defined below. The function `to_dataset` is an instance of `FITStoDataset` with all the default behaviour described above.

For datasets consisting of small amounts of data per FITS file, it appears to be more efficient to simply read from the FITS files every time. In such cases, we use `to_dataset_without_cache`, an instance of `FITStoDataset` with caching disabled.

Note that both of must be used to wrap functions, with the result being assigned a different name to the original. They cannot be used as decorators, unless `n_processes=1`.

In [7]:
# |exports

to_dataset = FITStoDataset()
to_dataset_without_cache = FITStoDataset(cache=False)

Existing cache files will be used when using the `qagmire.data` module. However, if running this notebook, the following line means we always update the cache, such that the timings reflect the original reading and conversion process.

In [8]:
to_dataset = FITStoDataset(update_cache=True)

## Locating WEAVE FITS files

Here we define some functions to get lists of WEAVE FITS filenames.

In [21]:
# |export


data_path = "/beegfs/weavelofar"

def _is_lowres(fn):
    """Check the header of FITS file `fn` to determine if it is low-resolution."""
    try:
        lowres = "LR" in fits.getval(fn, "RES-OBS")
    except KeyError:
        lowres = "LOWRES" in fits.getval(fn, "MODE")
    return lowres


def get_weave_files(
    level="*",  # pattern to match to the file level, e.g. raw, L1, L2
    filetype="*",  # pattern to match to the file type, e.g. single, stack
    date="*",  # pattern to match to the date in format yyyymmdd
    runid="*",  # pattern to match to the runid
    lowres=True,  # select low-res files, or high-res if False
    folder="weaveio",  # folder within the `data_path`
):
    """Get a list of matching WEAVE files."""
    if level != "raw":
        filetype += "_"
    level = "".join(f"[{c.upper()+c.lower()}]" for c in level)
    pattern = f"**/{level}/{date}*/{filetype}*{runid}*.fit*"
    pattern = os.path.join(data_path, folder, pattern)
    files = glob(pattern, recursive=True)
    files.sort()
    if lowres:
        files = [fn for fn in files if _is_lowres(fn)]
    else:
        files = [fn for fn in files if not _is_lowres(fn)]
    return files


def get_lr_raw_files(
    date="*",  # pattern to match to the date in format yyyymmdd
    runid="*",  # pattern to match to the runid
    folder="weaveio",  # folder within the `data_path`
):
    return get_weave_files(level="raw", date=date, runid=runid, lowres=True, folder=folder)


def get_lr_l1_single_files(
    date="*",  # pattern to match to the date in format yyyymmdd
    runid="*",  # pattern to match to the runid
    folder="weaveio",  # folder within the `data_path`
):
    return get_weave_files(
        level="L1", filetype="single", date=date, runid=runid, lowres=True, folder=folder
    )


def get_lr_l1_stack_files(
    date="*",  # pattern to match to the date in format yyyymmdd
    runid="*",  # pattern to match to the runid
    folder="weaveio",  # folder within the `data_path`
):
    return get_weave_files(
        level="L1", filetype="stack", date=date, runid=runid, lowres=True, folder=folder
    )


def get_lr_l2_stack_files(
    date="*",  # pattern to match to the date in format yyyymmdd
    runid="*",  # pattern to match to the runid
    folder="weaveio",  # folder within the `data_path`
):
    return get_weave_files(
        level="L2", filetype="stack", date=date, runid=runid, lowres=True, folder=folder
    )

In [18]:
# |export


def _read_fits_columns(
    fn: str,  # the filename of the FITS file to read
    ext: str,  # the name of the extension containing the table to read
    limit_precision=False,  # convert all float64 columns to float32
    index: str | None = None,  # remove rows where this column is masked
):
    """Read a FITS table to a dict of arrays and convert endianness."""
    cols = dict(Table.read(fn, ext, unit_parse_strict="silent"))
    cols = {c: cols[c].newbyteorder().byteswap() for c in cols}
    if limit_precision:
        for c in list(cols):
            if cols[c].dtype.type is np.float64:
                if np.can_cast(np.max(np.abs(cols[c])), np.float32):
                    cols[c] = cols[c].astype(np.float32)
    if index is not None:
        ok = ~cols[index].mask
        cols = {c: cols[c][ok] for c in cols}
    return cols

## Raw files

These contain the raw observations.

We will read some simulated files to show examples.

In [19]:
lr_raw_files = get_lr_raw_files(date="2017*")
print(len(lr_raw_files), "low-res raw files")

/beegfs/weavelofar/weaveio/**/[Rr][Aa][Ww]/2017*/****.fit*
120 low-res raw files


In [None]:
raw_hdus = fits.open(lr_raw_files[0])

WEAVE raw files contain six extensions:

In [None]:
print([hdu.name for hdu in raw_hdus])

['PRIMARY', 'RED1_DATA', 'RED2_DATA', 'FIBTABLE', 'GUIDINFO', 'METINFO']


### PRIMARY

The PRIMARY extension contains only a header with lots of information about the observation.

In [None]:
raw_hdus["PRIMARY"].header[:15]

SIMPLE  =                    T / conforms to FITS standard                      
BITPIX  =                    8 / array data type                                
NAXIS   =                    0 / number of array dimensions                     
EXTEND  =                    T                                                  
COMMENT -------- Start of the CAMERA Packet -------                             
RUN     =              1003313                                                  
IRAFNAME= 'r1003313'           / redir r2840373.fit > r1003313                  
DETECTOR= 'WVRED   '           / Selected by inference                          
CCDSPEED= 'SLOW    '                                                            
CCDXBIN =                    1                                                  
CCDYBIN =                    1                                                  
CCDSUM  = '1 1     '                                                            
CCDTEMP =    132.05553788667

In [None]:
# |export


def _primary_header_reader(fn):
    """Read the primary header as a Dataset, stripping comments."""
    hdr = fits.getheader(fn, "PRIMARY")
    for key in hdr:
        if key == "" or "COMM" in key:
            del hdr[key]
    return xr.Dataset(hdr)


read_primary_header = to_dataset_without_cache(_primary_header_reader)

In [None]:
# |hide
show_doc(read_primary_header, name="read_primary_header", title_level=4)

---

[source](https://github.com/bamford/qagmire/blob/main/qagmire/data.py#L310){target="_blank" style="float:right; font-size:smaller"}

#### read_primary_header

>      read_primary_header (fn)

Read the primary header as a Dataset, stripping comments.

In [None]:
raw_primary_header = read_primary_header(lr_raw_files)

Reading files: 100%|██████████| 120/120 [00:01<00:00, 90.27it/s]
Creating Dataset... took 1.89 s. Size is 0.604 Mb


Reading files:  22%|███████▏                         | 26/120 [00:00<00:01, 92.55it/s]

Reading files:  30%|█████████▉                       | 36/120 [00:00<00:00, 90.28it/s]

Reading files:  38%|████████████▋                    | 46/120 [00:00<00:01, 63.06it/s]

Reading files:  45%|██████████████▊                  | 54/120 [00:00<00:01, 64.00it/s]

Reading files:  53%|█████████████████▌               | 64/120 [00:00<00:00, 69.64it/s]

Reading files:  63%|████████████████████▉            | 76/120 [00:01<00:00, 75.68it/s]

Reading files:  72%|███████████████████████▉         | 87/120 [00:01<00:00, 77.86it/s]

Reading files:  82%|██████████████████████████▉      | 98/120 [00:01<00:00, 75.17it/s]

Reading files:  88%|████████████████████████████▎   | 106/120 [00:01<00:00, 63.35it/s]

Reading files:  98%|███████████████████████████████▏| 117/120 [00:01<00:00, 72.31it/s]

Reading files: 100%|████████████████████████████████| 120/120 [00:04<00:00, 25.90it/s]


Creating Dataset... 

took 1.67 s. Size is 0.604 Mb


In [None]:
print(raw_primary_header)

<xarray.Dataset> Size: 633kB
Dimensions:   (filename: 120)
Coordinates:
  * filename  (filename) object 960B 'r1003313' 'r1003317' ... 'r1004149'
    RUN       (filename) int64 960B 1003313 1003317 1003318 ... 1004151 1004149
    CAMERA    (filename) <U4 2kB 'RED' 'RED' 'BLUE' ... 'BLUE' 'RED' 'RED'
    OBID      (filename) int64 960B 3802 3802 3802 3802 ... 3936 3936 3936 3936
    MJD       (filename) float64 960B 5.781e+04 5.781e+04 ... 5.803e+04
    NIGHT     (filename) <U8 4kB '20170224' '20170224' ... '20170930' '20170930'
Data variables: (12/410)
    SIMPLE    (filename) bool 120B True True True True ... True True True True
    BITPIX    (filename) int64 960B 8 8 8 8 8 8 8 8 8 8 ... 8 8 8 8 8 8 8 8 8 8
    NAXIS     (filename) int64 960B 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
    EXTEND    (filename) bool 120B True True True True ... True True True True
    IRAFNAME  (filename) <U8 4kB 'r1003313' 'r1003317' ... 'r1004151' 'r1004149'
    DETECTOR  (filename) <U6 3kB 'WVRED' '

### DATA

The RED1_DATA, RED2_DATA and BLUE1_DATA, BLUE2_DATA extensions contain raw imaging of the spectra.

In [None]:
# |export


def _raw_data_reader(fn):
    """Read the *_DATA from a WEAVE RAW FITS file as a Dataset."""
    hdus = fits.open(fn)
    for h in hdus:
        if h.name.endswith("1_DATA"):
            counts1 = xr.DataArray(h.data)
        elif h.name.endswith("2_DATA"):
            counts2 = xr.DataArray(h.data)
    return xr.Dataset({"counts1": counts1, "counts2": counts2})


read_raw_data = to_dataset(_raw_data_reader)

In [None]:
# |hide
show_doc(read_raw_data, name="read_raw_data", title_level=4)

---

[source](https://github.com/bamford/qagmire/blob/main/qagmire/data.py#L322){target="_blank" style="float:right; font-size:smaller"}

#### read_raw_data

>      read_raw_data (fn)

Read the *_DATA from a WEAVE RAW FITS file as a Dataset.

In [None]:
raw_data = read_raw_data(lr_raw_files)

Locating and converting where necessary: 100%|██████████| 120/120 [00:26<00:00,  4.58it/s]
Reading netCDF files... took 2.92 s. Size is 17888.977 Mb


Locating and converting where necessary:   8%|▌      | 10/120 [00:03<00:39,  2.78it/s]

Locating and converting where necessary:  12%|▉      | 15/120 [00:03<00:20,  5.13it/s]

Locating and converting where necessary:  15%|█      | 18/120 [00:05<00:29,  3.51it/s]

Locating and converting where necessary:  18%|█▏     | 21/120 [00:05<00:21,  4.70it/s]

Locating and converting where necessary:  19%|█▎     | 23/120 [00:05<00:18,  5.19it/s]

Locating and converting where necessary:  21%|█▍     | 25/120 [00:07<00:28,  3.31it/s]

Locating and converting where necessary:  22%|█▌     | 27/120 [00:07<00:23,  3.93it/s]

Locating and converting where necessary:  25%|█▊     | 30/120 [00:08<00:27,  3.29it/s]

Locating and converting where necessary:  26%|█▊     | 31/120 [00:08<00:25,  3.53it/s]

Locating and converting where necessary:  28%|█▉     | 34/120 [00:09<00:19,  4.31it/s]

Locating and converting where necessary:  31%|██▏    | 37/120 [00:10<00:22,  3.67it/s]

Locating and converting where necessary:  32%|██▏    | 38/120 [00:10<00:20,  3.99it/s]

Locating and converting where necessary:  33%|██▎    | 40/120 [00:10<00:17,  4.53it/s]

Locating and converting where necessary:  35%|██▍    | 42/120 [00:10<00:13,  5.75it/s]

Locating and converting where necessary:  36%|██▌    | 43/120 [00:10<00:13,  5.92it/s]

Locating and converting where necessary:  37%|██▌    | 44/120 [00:11<00:21,  3.47it/s]

Locating and converting where necessary:  38%|██▋    | 45/120 [00:11<00:19,  3.90it/s]

Locating and converting where necessary:  38%|██▋    | 46/120 [00:11<00:18,  4.04it/s]

Locating and converting where necessary:  39%|██▋    | 47/120 [00:12<00:17,  4.18it/s]

Locating and converting where necessary:  42%|██▉    | 50/120 [00:12<00:10,  6.49it/s]

Locating and converting where necessary:  42%|██▉    | 51/120 [00:12<00:16,  4.20it/s]

Locating and converting where necessary:  43%|███    | 52/120 [00:13<00:17,  3.90it/s]

Locating and converting where necessary:  45%|███▏   | 54/120 [00:13<00:13,  5.06it/s]

Locating and converting where necessary:  46%|███▏   | 55/120 [00:13<00:13,  4.65it/s]

Locating and converting where necessary:  48%|███▍   | 58/120 [00:13<00:08,  7.05it/s]

Locating and converting where necessary:  49%|███▍   | 59/120 [00:14<00:16,  3.63it/s]

Locating and converting where necessary:  51%|███▌   | 61/120 [00:15<00:14,  4.19it/s]

Locating and converting where necessary:  52%|███▌   | 62/120 [00:15<00:13,  4.23it/s]

Locating and converting where necessary:  54%|███▊   | 65/120 [00:15<00:08,  6.28it/s]

Locating and converting where necessary:  55%|███▊   | 66/120 [00:16<00:13,  3.96it/s]

Locating and converting where necessary:  56%|███▉   | 67/120 [00:16<00:12,  4.10it/s]

Locating and converting where necessary:  57%|███▉   | 68/120 [00:16<00:10,  4.73it/s]

Locating and converting where necessary:  57%|████   | 69/120 [00:16<00:11,  4.32it/s]

Locating and converting where necessary:  58%|████   | 70/120 [00:17<00:11,  4.20it/s]

Locating and converting where necessary:  60%|████▏  | 72/120 [00:17<00:07,  6.32it/s]

Locating and converting where necessary:  62%|████▎  | 74/120 [00:17<00:09,  4.88it/s]

Locating and converting where necessary:  62%|████▍  | 75/120 [00:18<00:10,  4.44it/s]

Locating and converting where necessary:  63%|████▍  | 76/120 [00:18<00:10,  4.20it/s]

Locating and converting where necessary:  65%|████▌  | 78/120 [00:18<00:08,  4.80it/s]

Locating and converting where necessary:  68%|████▋  | 81/120 [00:18<00:05,  6.72it/s]

Locating and converting where necessary:  68%|████▊  | 82/120 [00:19<00:09,  4.09it/s]

Locating and converting where necessary:  69%|████▊  | 83/120 [00:20<00:10,  3.44it/s]

Locating and converting where necessary:  71%|████▉  | 85/120 [00:20<00:07,  4.62it/s]

Locating and converting where necessary:  73%|█████▏ | 88/120 [00:20<00:05,  6.11it/s]

Locating and converting where necessary:  74%|█████▏ | 89/120 [00:21<00:08,  3.80it/s]

Locating and converting where necessary:  75%|█████▎ | 90/120 [00:21<00:08,  3.65it/s]

Locating and converting where necessary:  77%|█████▎ | 92/120 [00:21<00:06,  4.55it/s]

Locating and converting where necessary:  78%|█████▍ | 94/120 [00:22<00:05,  5.13it/s]

Locating and converting where necessary:  79%|█████▌ | 95/120 [00:22<00:07,  3.24it/s]

Locating and converting where necessary:  80%|█████▌ | 96/120 [00:23<00:07,  3.40it/s]

Locating and converting where necessary:  81%|█████▋ | 97/120 [00:23<00:06,  3.54it/s]

Locating and converting where necessary:  82%|█████▊ | 99/120 [00:23<00:03,  5.30it/s]

Locating and converting where necessary:  83%|█████ | 100/120 [00:23<00:04,  4.57it/s]

Locating and converting where necessary:  84%|█████ | 101/120 [00:24<00:06,  3.02it/s]

Locating and converting where necessary:  85%|█████ | 102/120 [00:24<00:05,  3.42it/s]

Locating and converting where necessary:  86%|█████▏| 103/120 [00:25<00:05,  3.18it/s]

Locating and converting where necessary:  88%|█████▎| 105/120 [00:25<00:03,  4.83it/s]

Locating and converting where necessary:  88%|█████▎| 106/120 [00:25<00:02,  4.95it/s]

Locating and converting where necessary:  89%|█████▎| 107/120 [00:25<00:03,  3.53it/s]

Locating and converting where necessary:  90%|█████▍| 108/120 [00:26<00:02,  4.15it/s]

Locating and converting where necessary:  92%|█████▌| 110/120 [00:26<00:02,  4.93it/s]

Locating and converting where necessary:  92%|█████▌| 111/120 [00:26<00:02,  4.15it/s]

Locating and converting where necessary:  94%|█████▋| 113/120 [00:26<00:01,  5.46it/s]

Locating and converting where necessary:  95%|█████▋| 114/120 [00:27<00:01,  3.80it/s]

Locating and converting where necessary:  97%|█████▊| 116/120 [00:27<00:00,  5.25it/s]

Locating and converting where necessary:  98%|█████▊| 117/120 [00:27<00:00,  4.60it/s]

Locating and converting where necessary:  98%|█████▉| 118/120 [00:28<00:00,  4.36it/s]

Locating and converting where necessary:  99%|█████▉| 119/120 [00:28<00:00,  4.81it/s]

Locating and converting where necessary: 100%|██████| 120/120 [00:30<00:00,  1.49it/s]

Locating and converting where necessary: 100%|██████| 120/120 [00:30<00:00,  3.96it/s]


Reading netCDF files... 

took 2.40 s. Size is 17888.977 Mb


In [None]:
print(raw_data)

<xarray.Dataset> Size: 19GB
Dimensions:   (filename: 120, dim_0: 6160, dim_1: 6344)
Coordinates:
  * filename  (filename) <U8 4kB 'r1003315' 'r1003314' ... 'r1004151' 'r1004152'
    RUN       (filename) int64 960B dask.array<chunksize=(1,), meta=np.ndarray>
    CAMERA    (filename) <U4 2kB dask.array<chunksize=(1,), meta=np.ndarray>
    MJD       (filename) float64 960B dask.array<chunksize=(1,), meta=np.ndarray>
    NIGHT     (filename) <U8 4kB dask.array<chunksize=(1,), meta=np.ndarray>
    OBID      (filename) int64 960B dask.array<chunksize=(1,), meta=np.ndarray>
Dimensions without coordinates: dim_0, dim_1
Data variables:
    counts1   (filename, dim_0, dim_1) uint16 9GB dask.array<chunksize=(1, 6160, 6344), meta=np.ndarray>
    counts2   (filename, dim_0, dim_1) uint16 9GB dask.array<chunksize=(1, 6160, 6344), meta=np.ndarray>


It is a good idea to close a `Dataset` when you are finished with it, as otherwise other processes may not be able to access the same underlying files.

In [None]:
raw_data.close()

### FIBTABLE

The FIBTABLE extension contains information about the fibre allocations.

Code is included to display example content of FITS tables, but commented out as it does not display well online.

In [None]:
# Table.read(raw_hdus["FIBTABLE"])

In [None]:
# |export


def _fibre_table_reader_indexed(fn, index_by_nspec=True):
    cols = _read_fits_columns(fn, "FIBTABLE", index="FIBREID")
    cols = {c.upper(): cols[c] for c in cols}
    if not cols:
        return None
    coords = dict(APS_ID=cols.pop("FIBREID"))
    for c in cols:
        dims = ["APS_ID"]
        cols[c] = xr.Variable(dims, cols[c], attrs={"unit": str(cols[c].unit)})
    ds = xr.Dataset(cols, coords)
    if index_by_nspec:
        ds = ds.set_coords("NSPEC").swap_dims(APS_ID="NSPEC").reset_coords("APS_ID")
    return ds


def _fibre_table_reader(fn):
    """Read the FIBTABLE from a WEAVE RAW or L1 FITS file as a Dataset.

    All quantities are indexed by the `APS_ID` of the fibre.

    All column names are mde uppercase for consistency.
    """
    return _fibre_table_reader_indexed(fn, index_by_nspec=False)


def _fibre_table_reader_nspec(fn):
    """Read the FIBTABLE from a WEAVE L1 FITS file as a Dataset.

    All quantities are indexed by the `NSPEC` of the fibre.

    All column names are mde uppercase for consistency.
    """
    return _fibre_table_reader_indexed(fn, index_by_nspec=True)


read_fibre_table = to_dataset_without_cache(_fibre_table_reader)
read_fibre_table_nspec = to_dataset_without_cache(_fibre_table_reader_nspec)

In [None]:
# |hide
show_doc(read_fibre_table, name="read_fibre_table", title_level=4)

---

[source](https://github.com/bamford/qagmire/blob/main/qagmire/data.py#L351){target="_blank" style="float:right; font-size:smaller"}

#### read_fibre_table

>      read_fibre_table (fn)

Read the FIBTABLE from a WEAVE RAW or L1 FITS file as a Dataset.

All quantities are indexed by the `APS_ID` of the fibre.

All column names are mde uppercase for consistency.

In [None]:
# |hide
show_doc(read_fibre_table_nspec, name="read_fibre_table_nspec", title_level=4)

---

[source](https://github.com/bamford/qagmire/blob/main/qagmire/data.py#L361){target="_blank" style="float:right; font-size:smaller"}

#### read_fibre_table_nspec

>      read_fibre_table_nspec (fn)

Read the FIBTABLE from a WEAVE L1 FITS file as a Dataset.

All quantities are indexed by the `NSPEC` of the fibre.

All column names are mde uppercase for consistency.

In [None]:
raw_fibre_table = read_fibre_table(lr_raw_files)

Reading files: 100%|██████████| 120/120 [00:01<00:00, 84.95it/s] 
Creating Dataset... took 0.71 s. Size is 22.621 Mb


Reading files:  19%|██████▎                          | 23/120 [00:00<00:01, 78.03it/s]

Reading files:  27%|████████▊                        | 32/120 [00:00<00:01, 80.17it/s]

Reading files:  34%|███████████▎                     | 41/120 [00:00<00:00, 82.52it/s]

Reading files:  45%|██████████████▊                  | 54/120 [00:00<00:00, 97.39it/s]

Reading files:  53%|█████████████████▌               | 64/120 [00:00<00:00, 92.42it/s]

Reading files:  62%|████████████████████▎            | 74/120 [00:00<00:00, 88.92it/s]

Reading files:  72%|███████████████████████▉         | 87/120 [00:01<00:00, 94.61it/s]

Reading files:  82%|██████████████████████████▉      | 98/120 [00:01<00:00, 96.80it/s]

Reading files:  90%|████████████████████████████▊   | 108/120 [00:01<00:00, 97.30it/s]

Reading files:  99%|███████████████████████████████▋| 119/120 [00:01<00:00, 90.42it/s]

Reading files: 100%|████████████████████████████████| 120/120 [00:05<00:00, 21.83it/s]


Creating Dataset... 

took 0.72 s. Size is 22.621 Mb


In [None]:
print(raw_fibre_table)

<xarray.Dataset> Size: 24MB
Dimensions:    (APS_ID: 1008, filename: 120)
Coordinates:
  * APS_ID     (APS_ID) int16 2kB 0 1 2 3 4 5 ... 1002 1003 1004 1005 1006 1007
  * filename   (filename) object 960B 'r1003318' 'r1003313' ... 'r1004146'
    RUN        (filename) int64 960B 1003318 1003313 1003314 ... 1004152 1004146
    CAMERA     (filename) <U4 2kB 'BLUE' 'RED' 'BLUE' ... 'RED' 'BLUE' 'BLUE'
    MJD        (filename) float64 960B 5.781e+04 5.781e+04 ... 5.803e+04
    NIGHT      (filename) <U8 4kB '20170224' '20170224' ... '20170930'
    OBID       (filename) int64 960B 3802 3802 3802 3802 ... 3936 3936 3936 3936
Data variables: (12/36)
    CNAME      (filename, APS_ID) object 968kB b'WVE_10011233+0259032' ... nan
    FIBRERA    (filename, APS_ID) float64 968kB 150.3 nan nan ... nan nan nan
    FIBREDEC   (filename, APS_ID) float64 968kB 2.984 nan nan ... nan nan nan
    XPOSITION  (filename, APS_ID) float32 484kB -29.01 nan nan ... nan nan nan
    YPOSITION  (filename, APS_ID) flo

### GUIDINFO

The GUIDINFO extension contains info about the guiding. Currently not sure of the best way to organise this data.

In [None]:
# Table.read(raw_hdus["GUIDINFO"])

### METINFO

The METINFO extension contains meteographical information. Currently not sure of the best way to organise this data.

In [None]:
# Table.read(raw_hdus["METINFO"], unit_parse_strict="silent")

## L1 files

These contain lower-level processed data products. There are `single` files, which contain info for a single exposure, and `stack` files, which contain the same info for stacked exposures.

We will read some simulated single files to show examples.

In [None]:
lr_l1_single_files = get_lr_l1_single_files(date="2017*")
print(len(lr_l1_single_files), "low-res L1 single files")

60 low-res L1 single files


In [None]:
l1_hdus = fits.open(lr_l1_single_files[0])

WEAVE L1 single files contain seven extensions:

In [None]:
print([hdu.name for hdu in l1_hdus])

['PRIMARY', 'RED_DATA', 'RED_IVAR', 'RED_DATA_NOSS', 'RED_IVAR_NOSS', 'RED_SENSFUNC', 'FIBTABLE']


### PRIMARY

The PRIMARY extension contains only a header with lots of information about the observation.

In [None]:
l1_hdus["PRIMARY"].header[:15]

SIMPLE  =                    T / file does conform to FITS standard             
BITPIX  =                    8 / number of bits per data pixel                  
NAXIS   =                    0 / number of data axes                            
EXTEND  =                    T / FITS dataset may contain extensions            
COMMENT   FITS (Flexible Image Transport System) format is defined in 'Astronomy
COMMENT   and Astrophysics', volume 376, page 359; bibcode: 2001A&A...376..359H 
COMMENT -------- Start of the CAMERA Packet -------                             
RUN     =              1003317                                                  
IRAFNAME= 'r1003317'           / redir r2840376.fit > r1003317                  
DETECTOR= 'WVRED   '           / Selected by inference                          
CCDSPEED= 'SLOW    '                                                            
CCDXBIN =                    1                                                  
CCDYBIN =                   

In [None]:
l1_primary_header = read_primary_header(lr_l1_single_files)

Reading files: 100%|██████████| 60/60 [00:00<00:00, 83.35it/s]
Creating Dataset... took 1.14 s. Size is 0.379 Mb


Reading files:  42%|██████████████▏                   | 25/60 [00:00<00:00, 93.97it/s]

Reading files:  58%|███████████████████▊              | 35/60 [00:00<00:00, 87.18it/s]

Reading files:  78%|██████████████████████████▋       | 47/60 [00:00<00:00, 67.64it/s]

Reading files:  95%|████████████████████████████████▎ | 57/60 [00:00<00:00, 70.10it/s]

Reading files: 100%|██████████████████████████████████| 60/60 [00:00<00:00, 73.42it/s]


Creating Dataset... 

took 1.01 s. Size is 0.379 Mb


In [None]:
print(l1_primary_header)

<xarray.Dataset> Size: 397kB
Dimensions:   (filename: 60)
Coordinates:
  * filename  (filename) object 480B 'single_1003318' ... 'single_1004150'
    RUN       (filename) int64 480B 1003318 1003317 1003320 ... 1004149 1004150
    CAMERA    (filename) <U4 960B 'BLUE' 'RED' 'BLUE' ... 'BLUE' 'RED' 'BLUE'
    OBID      (filename) int64 480B 3802 3802 3802 3802 ... 4407 3936 3936 3936
    MJD       (filename) float64 480B 5.781e+04 5.781e+04 ... 5.803e+04
    NIGHT     (filename) <U8 2kB '20170224' '20170224' ... '20170930' '20170930'
Data variables: (12/447)
    SIMPLE    (filename) bool 60B True True True True ... True True True True
    BITPIX    (filename) int64 480B 8 8 8 8 8 8 8 8 8 8 ... 8 8 8 8 8 8 8 8 8 8
    NAXIS     (filename) int64 480B 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
    EXTEND    (filename) bool 60B True True True True ... True True True True
    IRAFNAME  (filename) <U8 2kB 'r1003318' 'r1003317' ... 'r1004149' 'r1004150'
    DETECTOR  (filename) <U6 1kB 'WVBLUE'

### DATA and SENSFUNC

The RED/BLUE_DATA, RED/BLUE_IVAR, RED/BLUE_DATA_NOSS, RED/BLUE_IVAR_NOSS and RED_BLUE_SENSFUNC extensions contain the reduced binned spectra and their inverse variance, with and without sky subtraction, plus the sensitivity function. Implementation TBD, but I think it makes sense for all of these to be stored in a single Dataset.

In [None]:
# |export


def _l1_data_reader(fn):
    """Read the data from a WEAVE L1 FITS file as a Dataset."""
    hdus = fits.open(fn)
    camera = hdus["PRIMARY"].header["CAMERA"].replace("WEAVE", "")
    band = camera[0]
    hdr = hdus[f"{camera}_DATA"].header
    increment, zeropoint, size_wl, size_nspec = (
        hdr["CD1_1"],
        hdr["CRVAL1"],
        hdr["NAXIS1"],
        hdr["NAXIS2"],
    )
    wl = np.arange(0, size_wl) * increment + zeropoint
    nspec = np.arange(1, size_nspec + 1)
    coords = {"NSPEC": nspec, f"LAMBDA_{band}": wl}
    dims = list(coords.keys())
    arrays = {}
    for ext in ["DATA", "IVAR", "DATA_NOSS", "IVAR_NOSS", "SENSFUNC"]:
        name = f"{camera}_{ext}"
        data = hdus[name].data
        unit = hdus[name].header["BUNIT"]
        name = name.replace("DATA", "FLUX")
        arrays[name] = xr.Variable(dims, data, attrs={"unit": str(unit)})
    return xr.Dataset(arrays, coords)


read_l1_data = to_dataset(_l1_data_reader)

In [None]:
# |hide
show_doc(read_l1_data, name="read_l1_data", title_level=4)

---

[source](https://github.com/bamford/qagmire/blob/main/qagmire/data.py#L375){target="_blank" style="float:right; font-size:smaller"}

#### read_l1_data

>      read_l1_data (fn)

Read the data from a WEAVE L1 FITS file as a Dataset.

In [None]:
l1_data = read_l1_data(lr_l1_single_files)

Locating and converting where necessary: 100%|██████████| 60/60 [00:21<00:00,  2.75it/s]
Reading netCDF files... took 3.47 s. Size is 27397.910 Mb


Locating and converting where necessary:   8%|▊        | 5/60 [00:01<00:13,  3.98it/s]

Locating and converting where necessary:  10%|▉        | 6/60 [00:01<00:11,  4.72it/s]

Locating and converting where necessary:  15%|█▎       | 9/60 [00:02<00:09,  5.19it/s]

Locating and converting where necessary:  17%|█▎      | 10/60 [00:02<00:11,  4.33it/s]

Locating and converting where necessary:  18%|█▍      | 11/60 [00:02<00:10,  4.87it/s]

Locating and converting where necessary:  22%|█▋      | 13/60 [00:02<00:07,  6.24it/s]

Locating and converting where necessary:  23%|█▊      | 14/60 [00:03<00:09,  4.73it/s]

Locating and converting where necessary:  25%|██      | 15/60 [00:03<00:08,  5.37it/s]

Locating and converting where necessary:  27%|██▏     | 16/60 [00:03<00:08,  5.10it/s]

Locating and converting where necessary:  28%|██▎     | 17/60 [00:03<00:07,  5.46it/s]

Locating and converting where necessary:  32%|██▌     | 19/60 [00:04<00:11,  3.50it/s]

Locating and converting where necessary:  33%|██▋     | 20/60 [00:04<00:11,  3.49it/s]

Locating and converting where necessary:  35%|██▊     | 21/60 [00:05<00:11,  3.34it/s]

Locating and converting where necessary:  38%|███     | 23/60 [00:05<00:07,  5.04it/s]

Locating and converting where necessary:  40%|███▏    | 24/60 [00:06<00:17,  2.03it/s]

Locating and converting where necessary:  42%|███▎    | 25/60 [00:07<00:14,  2.40it/s]

Locating and converting where necessary:  45%|███▌    | 27/60 [00:07<00:12,  2.55it/s]

Locating and converting where necessary:  47%|███▋    | 28/60 [00:08<00:13,  2.38it/s]

Locating and converting where necessary:  48%|███▊    | 29/60 [00:08<00:12,  2.42it/s]

Locating and converting where necessary:  50%|████    | 30/60 [00:09<00:12,  2.48it/s]

Locating and converting where necessary:  52%|████▏   | 31/60 [00:09<00:09,  3.11it/s]

Locating and converting where necessary:  57%|████▌   | 34/60 [00:10<00:09,  2.74it/s]

Locating and converting where necessary:  58%|████▋   | 35/60 [00:10<00:09,  2.72it/s]

Locating and converting where necessary:  65%|█████▏  | 39/60 [00:10<00:03,  5.35it/s]

Locating and converting where necessary:  68%|█████▍  | 41/60 [00:11<00:03,  6.14it/s]

Locating and converting where necessary:  72%|█████▋  | 43/60 [00:11<00:03,  5.25it/s]

Locating and converting where necessary:  73%|█████▊  | 44/60 [00:11<00:02,  5.48it/s]

Locating and converting where necessary:  77%|██████▏ | 46/60 [00:11<00:01,  7.10it/s]

Locating and converting where necessary:  82%|██████▌ | 49/60 [00:11<00:01,  9.14it/s]

Locating and converting where necessary:  85%|██████▊ | 51/60 [00:12<00:01,  5.64it/s]

Locating and converting where necessary:  87%|██████▉ | 52/60 [00:12<00:01,  5.66it/s]

Locating and converting where necessary:  92%|███████▎| 55/60 [00:13<00:00,  7.30it/s]

Locating and converting where necessary:  95%|███████▌| 57/60 [00:13<00:00,  8.20it/s]

Locating and converting where necessary: 100%|████████| 60/60 [00:13<00:00, 10.59it/s]

Locating and converting where necessary: 100%|████████| 60/60 [00:13<00:00,  4.47it/s]


Reading netCDF files... 

took 3.22 s. Size is 27397.910 Mb


In [None]:
print(l1_data)

<xarray.Dataset> Size: 29GB
Dimensions:         (NSPEC: 960, LAMBDA_B: 9649, filename: 60, LAMBDA_R: 15289)
Coordinates:
  * NSPEC           (NSPEC) int64 8kB 1 2 3 4 5 6 7 ... 955 956 957 958 959 960
  * LAMBDA_B        (LAMBDA_B) float64 77kB 3.676e+03 3.676e+03 ... 6.088e+03
  * filename        (filename) <U14 3kB 'single_1003322' ... 'single_1004149'
    RUN             (filename) int64 480B dask.array<chunksize=(1,), meta=np.ndarray>
    CAMERA          (filename) <U4 960B dask.array<chunksize=(1,), meta=np.ndarray>
    MJD             (filename) float64 480B dask.array<chunksize=(1,), meta=np.ndarray>
    NIGHT           (filename) <U8 2kB dask.array<chunksize=(1,), meta=np.ndarray>
    OBID            (filename) int64 480B dask.array<chunksize=(1,), meta=np.ndarray>
  * LAMBDA_R        (LAMBDA_R) float64 122kB 5.772e+03 5.772e+03 ... 9.594e+03
Data variables:
    BLUE_FLUX       (filename, NSPEC, LAMBDA_B) float32 2GB dask.array<chunksize=(1, 960, 9649), meta=np.ndarray>
    BLU

It is a good idea to close a `Dataset` when you are finished with it, as otherwise other processes may not be able to access the same underlying files.

In [None]:
l1_data.close()

### FIBTABLE

The FIBTABLE extension contains information about the fibre allocations, plus some basic measurements.

In [None]:
# Table.read(l1_hdus["FIBTABLE"])

In [None]:
l1_fibre_table = read_fibre_table(lr_l1_single_files)

Reading files: 100%|██████████| 60/60 [00:01<00:00, 53.93it/s]
Creating Dataset... took 0.53 s. Size is 18.850 Mb


Reading files:  28%|█████████▋                        | 17/60 [00:00<00:00, 44.43it/s]

Reading files:  42%|██████████████▏                   | 25/60 [00:00<00:00, 51.64it/s]

Reading files:  55%|██████████████████▋               | 33/60 [00:00<00:00, 57.72it/s]

Reading files:  70%|███████████████████████▊          | 42/60 [00:00<00:00, 66.76it/s]

Reading files:  85%|████████████████████████████▉     | 51/60 [00:00<00:00, 67.85it/s]

Reading files:  98%|█████████████████████████████████▍| 59/60 [00:01<00:00, 70.94it/s]

Reading files: 100%|██████████████████████████████████| 60/60 [00:01<00:00, 58.13it/s]


Creating Dataset... 

took 0.58 s. Size is 18.850 Mb


In [None]:
print(l1_fibre_table)

<xarray.Dataset> Size: 20MB
Dimensions:       (APS_ID: 1004, filename: 60)
Coordinates:
  * APS_ID        (APS_ID) int16 2kB 1 2 3 4 5 6 ... 1003 1004 1005 1006 1007
  * filename      (filename) object 480B 'single_1003318' ... 'single_1004150'
    RUN           (filename) int64 480B 1003318 1003317 ... 1004149 1004150
    CAMERA        (filename) <U4 960B 'BLUE' 'RED' 'RED' ... 'RED' 'RED' 'BLUE'
    MJD           (filename) float64 480B 5.781e+04 5.781e+04 ... 5.803e+04
    NIGHT         (filename) <U8 2kB '20170224' '20170224' ... '20170930'
    OBID          (filename) int64 480B 3802 3802 3802 3802 ... 4407 3936 3936
Data variables: (12/59)
    NSPEC         (filename, APS_ID) float32 241kB nan nan nan ... nan nan nan
    CNAME         (filename, APS_ID) object 482kB nan nan nan ... nan nan nan
    FIBRERA       (filename, APS_ID) float64 482kB nan nan nan ... nan nan nan
    FIBREDEC      (filename, APS_ID) float64 482kB nan nan nan ... nan nan nan
    XPOSITION     (filename, AP

In [None]:
l1_fibre_table_nspec = read_fibre_table_nspec(lr_l1_single_files)

Reading files: 100%|██████████| 60/60 [00:00<00:00, 65.14it/s]
Creating Dataset... took 0.41 s. Size is 18.024 Mb


Reading files:  28%|█████████▋                        | 17/60 [00:00<00:01, 37.31it/s]

Reading files:  42%|██████████████▏                   | 25/60 [00:00<00:00, 47.69it/s]

Reading files:  55%|██████████████████▋               | 33/60 [00:00<00:00, 54.98it/s]

Reading files:  68%|███████████████████████▏          | 41/60 [00:00<00:00, 61.91it/s]

Reading files:  82%|███████████████████████████▊      | 49/60 [00:01<00:00, 66.60it/s]

Reading files:  95%|████████████████████████████████▎ | 57/60 [00:01<00:00, 69.17it/s]

Reading files: 100%|██████████████████████████████████| 60/60 [00:01<00:00, 51.79it/s]


Creating Dataset... 

took 0.64 s. Size is 18.024 Mb


In [None]:
print(l1_fibre_table_nspec)

<xarray.Dataset> Size: 19MB
Dimensions:       (NSPEC: 960, filename: 60)
Coordinates:
  * NSPEC         (NSPEC) int16 2kB 1 2 3 4 5 6 7 ... 955 956 957 958 959 960
  * filename      (filename) object 480B 'single_1003317' ... 'single_1004150'
    RUN           (filename) int64 480B 1003317 1003319 ... 1004149 1004150
    CAMERA        (filename) <U4 960B 'RED' 'RED' 'BLUE' ... 'BLUE' 'RED' 'BLUE'
    MJD           (filename) float64 480B 5.781e+04 5.781e+04 ... 5.803e+04
    NIGHT         (filename) <U8 2kB '20170224' '20170224' ... '20170930'
    OBID          (filename) int64 480B 3802 3802 3802 3802 ... 3936 3936 3936
Data variables: (12/59)
    CNAME         (filename, NSPEC) object 461kB b'WVE_10005104+0244131' ... nan
    FIBRERA       (filename, NSPEC) float64 461kB 150.2 150.2 150.4 ... nan nan
    FIBREDEC      (filename, NSPEC) float64 461kB 2.737 2.653 2.6 ... nan nan
    XPOSITION     (filename, NSPEC) float32 230kB -19.73 -18.88 ... nan nan
    YPOSITION     (filename, NSP

## L2 files

These contain higher-level processed data products. There are `single` files, which contain meaurements on a single exposure, and `stack` files, which contain the same measurements on stacked exposures.

We will read some simulated stack files to show examples.

In [None]:
lr_l2_stack_files = get_lr_l2_stack_files(date="2017*")
print(len(lr_l2_stack_files), "low-res L2 stack files")

6 low-res L2 stack files


In [None]:
l2_hdus = fits.open(lr_l2_stack_files[0])

WEAVE L2 stack files contain six extensions:

In [None]:
print([hdu.name for hdu in l2_hdus])

['PRIMARY', 'CLASS_TABLE', 'STAR_TABLE', 'GALAXY_TABLE', 'CLASS_SPEC', 'STAR_SPEC', 'GALAXY_SPEC']


### PRIMARY

The PRIMARY extension contains only a header with some basic information.

In [None]:
l2_hdus["PRIMARY"].header

SIMPLE  =                    T / conforms to FITS standard                      
BITPIX  =                    8 / array data type                                
NAXIS   =                    0 / number of array dimensions                     
EXTEND  =                    T                                                  
L1_REF_0= 'stack_1003318.fit'  / L1 reference file                              
L1_REF_1= 'stack_1003317.fit'  / L1 reference file                              
L1_REF_2= '' / L1 reference file                                                
DATE-OBS= '20170224'           / L1: OBS-DATE                                   
OBSMODE = 'MOS     '           / L1: OBSMODE                                    
RES-OBS = 'LR      '           / L2: RES-DATE                                   
OBID    = '3802    '           / L1: OBID                                       
CHECKSUM= '97WQA7WO97WOA7WO'   / HDU checksum updated 2022-02-08T02:11:32       
DATASUM = '0       '        

In [None]:
l2_primary_header = read_primary_header(lr_l2_stack_files)

Reading files: 100%|██████████| 6/6 [00:00<00:00, 102.97it/s]
Creating Dataset... took 0.01 s. Size is 0.002 Mb


Creating Dataset... took 0.01 s. Size is 0.002 Mb


In [None]:
print(l2_primary_header)

<xarray.Dataset> Size: 2kB
Dimensions:   (filename: 6)
Coordinates:
  * filename  (filename) object 48B 'stack_1003354__stack_1003353_APS' ... 's...
    OBID      (filename) <U4 96B '3653' '3756' '3803' '3900' '3806' '3802'
Data variables:
    SIMPLE    (filename) bool 6B True True True True True True
    BITPIX    (filename) int64 48B 8 8 8 8 8 8
    NAXIS     (filename) int64 48B 0 0 0 0 0 0
    EXTEND    (filename) bool 6B True True True True True True
    L1_REF_0  (filename) <U17 408B 'stack_1003354.fit' ... 'stack_1003318.fit'
    L1_REF_1  (filename) <U17 408B 'stack_1003353.fit' ... 'stack_1003317.fit'
    L1_REF_2  (filename) <U1 24B '' '' '' '' '' ''
    DATE-OBS  (filename) <U8 192B '20170225' '20170227' ... '20170224'
    OBSMODE   (filename) <U3 72B 'MOS' 'MOS' 'MOS' 'MOS' 'MOS' 'MOS'
    RES-OBS   (filename) <U2 48B 'LR' 'LR' 'LR' 'LR' 'LR' 'LR'
    CHECKSUM  (filename) <U16 384B '97WNC7TL97TLC7TL' ... '97WQA7WO97WOA7WO'
    DATASUM   (filename) <U1 24B '0' '0' '0' '0' '0

### CLASS_TABLE

The CLASS_TABLE extension contains information from matching various templates to the spectra. The redshift and class of the best fitting template are given, as well as the full cross-correlation results of each template as a function of redshift.

In [None]:
# Table.read(l2_hdus["CLASS_TABLE"]) # contains multidimensional columns, so not shown

In [None]:
# |export


def _class_table_reader(fn):
    """Read the CLASS_TABLE from a WEAVE L2 FITS file as a Dataset.

    All quantities are indexed by the `APS_ID` of the fibre.
    Chi-square values `CZZ_CHI2_*` for each template are further indexed by redshift `CZZ_*`.
    Coefficients `COEFF` and indexed by integers `I_COEFF`.
    """
    cols = _read_fits_columns(fn, "CLASS_TABLE")
    if not cols:
        return None
    coords = dict(APS_ID=cols.pop("APS_ID"))
    # convert CZZ columns to coordinates
    for c in list(cols):
        if c.startswith("CZZ") and "CHI2" not in c:
            czz_all = cols.pop(c)
            czz = czz_all[0]
            assert (czz == czz_all).all()
            coords[c] = czz
    for c in cols:
        dims = ["APS_ID"]
        if c.startswith("CZZ"):
            dims += [c.replace("_CHI2", "")]
        elif c == "COEFF":
            dims += ["I_COEFF"]
        cols[c] = xr.Variable(dims, cols[c], attrs={"unit": str(cols[c].unit)})
    return xr.Dataset(cols, coords)


read_class_table = to_dataset(_class_table_reader)

In [None]:
# |hide
show_doc(read_class_table, name="read_class_table", title_level=4)

---

[source](https://github.com/bamford/qagmire/blob/main/qagmire/data.py#L404){target="_blank" style="float:right; font-size:smaller"}

#### read_class_table

>      read_class_table (fn)

Read the CLASS_TABLE from a WEAVE L2 FITS file as a Dataset.

All quantities are indexed by the `APS_ID` of the fibre.
Chi-square values `CZZ_CHI2_*` for each template are further indexed by redshift `CZZ_*`.
Coefficients `COEFF` and indexed by integers `I_COEFF`.

In [None]:
class_table = read_class_table(lr_l2_stack_files)

Locating and converting where necessary: 100%|██████████| 6/6 [00:01<00:00,  3.19it/s]
Reading netCDF files... took 1.19 s. Size is 178.858 Mb


Locating and converting where necessary:  67%|██████▋   | 4/6 [00:01<00:00,  3.75it/s]

Locating and converting where necessary: 100%|██████████| 6/6 [00:01<00:00,  4.29it/s]


Reading netCDF files... 

took 1.14 s. Size is 178.858 Mb


In [None]:
print(class_table)

<xarray.Dataset> Size: 188MB
Dimensions:           (APS_ID: 995, CZZ_GALAXY: 1446, CZZ_QSO: 1648,
                       CZZ_STAR_A: 101, CZZ_STAR_B: 101, CZZ_STAR_CV: 101,
                       CZZ_STAR_F: 101, CZZ_STAR_G: 101, CZZ_STAR_K: 101,
                       CZZ_STAR_M: 101, CZZ_STAR_WD: 101, filename: 6,
                       I_COEFF: 10)
Coordinates: (12/13)
  * APS_ID            (APS_ID) int64 8kB 1 2 3 4 5 ... 1003 1004 1005 1006 1007
  * CZZ_GALAXY        (CZZ_GALAXY) float64 12kB -0.005 -0.004312 ... 1.698 1.7
  * CZZ_QSO           (CZZ_QSO) float64 13kB 0.05 0.05121 ... 5.985 5.993
  * CZZ_STAR_A        (CZZ_STAR_A) float64 808B -0.002 -0.00196 ... 0.002
  * CZZ_STAR_B        (CZZ_STAR_B) float64 808B -0.002 -0.00196 ... 0.002
  * CZZ_STAR_CV       (CZZ_STAR_CV) float64 808B -0.002 -0.00196 ... 0.002
    ...                ...
  * CZZ_STAR_G        (CZZ_STAR_G) float64 808B -0.002 -0.00196 ... 0.002
  * CZZ_STAR_K        (CZZ_STAR_K) float64 808B -0.002 -0.00196 ... 

In [None]:
class_table.close()

### STAR_TABLE

The STAR_TABLE extension contains measurements of stellar parameters.

In [None]:
# Table.read(l2_hdus["STAR_TABLE"])  # contains multidimensional columns, so not shown

In [None]:
# |export


def _star_table_reader(fn):
    """Read the STAR_TABLE from a WEAVE L2 FITS file as a Dataset.

    All quantities are indexed by the `APS_ID` of the fibre.
    The covariance matrix `COVAR` is additionally indexed by `I_COVAR`, `J_COVAR`.
    The elements `ELEM` and `ELEM_ERR` are additionally indexed by `I_ELEM`.
    """
    cols = _read_fits_columns(fn, "STAR_TABLE")
    if not cols:
        return None
    coords = dict(APS_ID=cols.pop("APS_ID"))
    coords["I_COVAR"] = coords["J_COVAR"] = ["TEFF", "LOGG", "FEH", "ALPHA", "MICRO"]
    for c in cols:
        dims = ["APS_ID"]
        if c == "COVAR":
            dims += ["I_COVAR", "J_COVAR"]
        elif "ELEM" in c:
            dims += ["I_ELEM"]
        cols[c] = xr.Variable(dims, cols[c], attrs={"unit": str(cols[c].unit)})
    return xr.Dataset(cols, coords)


read_star_table = to_dataset(_star_table_reader)

In [None]:
# |hide
show_doc(read_star_table, name="read_star_table", title_level=4)

---

[source](https://github.com/bamford/qagmire/blob/main/qagmire/data.py#L435){target="_blank" style="float:right; font-size:smaller"}

#### read_star_table

>      read_star_table (fn)

Read the STAR_TABLE from a WEAVE L2 FITS file as a Dataset.

All quantities are indexed by the `APS_ID` of the fibre.
The covariance matrix `COVAR` is additionally indexed by `I_COVAR`, `J_COVAR`.
The elements `ELEM` and `ELEM_ERR` are additionally indexed by `I_ELEM`.

In [None]:
star_table = read_star_table(lr_l2_stack_files)

Locating and converting where necessary: 100%|██████████| 6/6 [00:00<00:00,  6.69it/s]
Reading netCDF files... took 1.02 s. Size is 0.065 Mb



Reading netCDF files... 

took 1.08 s. Size is 0.065 Mb


In [None]:
print(star_table)

<xarray.Dataset> Size: 69kB
Dimensions:        (APS_ID: 24, I_COVAR: 5, J_COVAR: 5, filename: 6, I_ELEM: 2)
Coordinates:
  * APS_ID         (APS_ID) int64 192B 194 222 283 304 329 ... 922 946 948 992
  * I_COVAR        (I_COVAR) <U5 100B 'TEFF' 'LOGG' 'FEH' 'ALPHA' 'MICRO'
  * J_COVAR        (J_COVAR) <U5 100B 'TEFF' 'LOGG' 'FEH' 'ALPHA' 'MICRO'
  * filename       (filename) <U32 768B 'stack_1003426__stack_1003425_APS' .....
    OBID           (filename) <U4 96B dask.array<chunksize=(1,), meta=np.ndarray>
Dimensions without coordinates: I_ELEM
Data variables: (12/33)
    TARGID         (filename, APS_ID) object 1kB dask.array<chunksize=(1, 24), meta=np.ndarray>
    CNAME          (filename, APS_ID) object 1kB dask.array<chunksize=(1, 24), meta=np.ndarray>
    VRAD           (filename, APS_ID) float64 1kB dask.array<chunksize=(1, 24), meta=np.ndarray>
    VRAD_ERR       (filename, APS_ID) float64 1kB dask.array<chunksize=(1, 24), meta=np.ndarray>
    SKEWNESS_RVS   (filename, APS_ID) fl

In [None]:
star_table.close()

### GALAXY_TABLE

The GALAXY_TABLE extension contains measurements of galaxy parameters, including Hubble-flow corrected redshifts, stellar kinematics, line fits and indices.

In [None]:
# Table.read(l2_hdus["GALAXY_TABLE"], unit_parse_strict="silent")  # contains multidimensional columns, so not shown

In [None]:
# |export


def _not_line_col(c):
    """Identify columns that do not contain line measurements."""
    c = c.replace("ERR_", "")
    for n in ["EBMV0", "EBMV1", "FLUX", "AMPL", "Z", "SIGMA", "AON", "FWHM"]:
        if c.startswith(n + "_"):
            return False
    return True


def _process_line_quantities(cols, lines):
    """Process line quantities.

    Quantities with multiple elements are split into separate columns.

    The supplied `cols` dictionary is modified in-place.
    """
    line_quantities = []
    for c in list(cols):
        if c.endswith(lines[0]):
            qty = c.replace("_" + lines[0], "")
            ndim = cols[c].ndim
            if ndim == 1:
                line_quantities.append(qty)
            elif ndim == 2:
                nq = cols[c].shape[1]
                for i in range(nq):
                    line_quantities.append(f"{qty}{i}")
                for line in lines:
                    oldcol = cols.pop(f"{qty}_{line}")
                    for i in range(nq):
                        cols[f"{qty}{i}_{line}"] = oldcol[:, i]
    return line_quantities


def _galaxy_table_reader(fn):
    """Read the GALAXY_TABLE from a WEAVE L2 FITS file as a Dataset.

    All quantities are indexed by the `APS_ID` of the fibre.
    The line measurements are additionally indexed by the measurement quantity `QTY`
    and the line name `LINE`.
    The index measurements are additionally indexed by the index name `INDEX`.
    """
    # TODO: add units where missing
    cols = _read_fits_columns(fn, "GALAXY_TABLE")
    if not cols:
        return None
    coords = dict(APS_ID=cols.pop("APS_ID"))
    coords["LINE"] = [c.replace("FLUX_", "") for c in cols if c.startswith("FLUX")]
    coords["QTY"] = _process_line_quantities(cols, coords["LINE"])
    coords["INDEX"] = [c for i, c in enumerate(cols) if (i > 100 and _not_line_col(c))]
    line_cols = [
        [cols.pop(f"{qty}_{line}") for line in coords["LINE"]] for qty in coords["QTY"]
    ]
    index_cols = [cols.pop(idx) for idx in coords["INDEX"]]
    out_cols = {}
    for i, c in enumerate(cols):
        dims = ["APS_ID"]
        out_cols[c] = xr.Variable(dims, cols[c], attrs={"unit": str(cols[c].unit)})
    out_cols["LINES"] = xr.Variable(["QTY", "LINE", "APS_ID"], line_cols)
    out_cols["INDICES"] = xr.Variable(["INDEX", "APS_ID"], index_cols)
    return xr.Dataset(out_cols, coords)


read_galaxy_table = to_dataset(_galaxy_table_reader)

In [None]:
# |hide
show_doc(read_galaxy_table, name="read_galaxy_table", title_level=4)

---

[source](https://github.com/bamford/qagmire/blob/main/qagmire/data.py#L494){target="_blank" style="float:right; font-size:smaller"}

#### read_galaxy_table

>      read_galaxy_table (fn)

Read the GALAXY_TABLE from a WEAVE L2 FITS file as a Dataset.

All quantities are indexed by the `APS_ID` of the fibre.
The line measurements are additionally indexed by the measurement quantity `QTY`
and the line name `LINE`.
The index measurements are additionally indexed by the index name `INDEX`.

In [None]:
galaxy_table = read_galaxy_table(lr_l2_stack_files)

Locating and converting where necessary: 100%|██████████| 6/6 [00:01<00:00,  4.63it/s]
Reading netCDF files... took 0.95 s. Size is 27.393 Mb


Locating and converting where necessary: 100%|██████████| 6/6 [00:01<00:00,  5.51it/s]

Locating and converting where necessary: 100%|██████████| 6/6 [00:01<00:00,  4.40it/s]


Reading netCDF files... 

took 1.30 s. Size is 27.393 Mb


In [None]:
print(galaxy_table)

<xarray.Dataset> Size: 29MB
Dimensions:         (APS_ID: 994, LINE: 22, QTY: 13, INDEX: 292, filename: 6)
Coordinates:
  * APS_ID          (APS_ID) int32 4kB 1 2 3 4 5 6 ... 1003 1004 1005 1006 1007
  * LINE            (LINE) <U15 1kB 'HeII_3203.15' ... '[ArIII]_7135.67'
  * QTY             (QTY) <U9 468B 'FLUX' 'AMPL' 'Z' ... 'ERR_EBMV0' 'ERR_EBMV1'
  * INDEX           (INDEX) <U16 19kB 'BL1719' 'ERR_BL1719' ... 'ERR_MgI2.28'
  * filename        (filename) <U32 768B 'stack_1003438__stack_1003437_APS' ....
    OBID            (filename) <U4 96B dask.array<chunksize=(1,), meta=np.ndarray>
Data variables: (12/26)
    TARGID          (filename, APS_ID) object 48kB dask.array<chunksize=(1, 994), meta=np.ndarray>
    CNAME           (filename, APS_ID) object 48kB dask.array<chunksize=(1, 994), meta=np.ndarray>
    ZCORR           (filename, APS_ID) float64 48kB dask.array<chunksize=(1, 994), meta=np.ndarray>
    V               (filename, APS_ID) float64 48kB dask.array<chunksize=(1, 994), 

In [None]:
galaxy_table.close()

### CLASS_SPEC

The CLASS_SPEC extension contains spectra and model fits, performed separately for the red and blue arms.

In [None]:
# Table.read(l2_hdus["CLASS_SPEC"], unit_parse_strict="silent")  # contains multidimensional columns, so not shown

In [None]:
# |export


def _class_spec_reader(fn):
    """Read the CLASS_SPEC from a WEAVE L2 FITS file as a Dataset.

    All quantities are indexed by the `APS_ID` of the fibre.
    Spectral quantities are additionally indexed by wavelength `LAMBDA_{B,R}`.
    """
    cols = _read_fits_columns(fn, "CLASS_SPEC", limit_precision=True)
    if not cols:
        return None
    coords = dict(APS_ID=cols.pop("APS_ID"))
    for c in list(cols):
        if c.startswith("LAMBDA"):
            band = c.split("_")[-1]
            wl_all = cols.pop(c)
            wl = wl_all[0]
            assert (wl == wl_all).all()
            coords[f"LAMBDA_{band}"] = wl
    for c in cols:
        dims = ["APS_ID"]
        if c.endswith("_B"):
            dims += ["LAMBDA_B"]
        elif c.endswith("_R"):
            dims += ["LAMBDA_R"]
        cols[c] = xr.Variable(dims, cols[c], attrs={"unit": str(cols[c].unit)})
    return xr.Dataset(cols, coords)


read_class_spec = to_dataset(_class_spec_reader)

In [None]:
# |hide
show_doc(read_class_spec, name="read_class_spec", title_level=4)

---

[source](https://github.com/bamford/qagmire/blob/main/qagmire/data.py#L526){target="_blank" style="float:right; font-size:smaller"}

#### read_class_spec

>      read_class_spec (fn)

Read the CLASS_SPEC from a WEAVE L2 FITS file as a Dataset.

All quantities are indexed by the `APS_ID` of the fibre.
Spectral quantities are additionally indexed by wavelength `LAMBDA_{B,R}`.

In [None]:
class_spec = read_class_spec(lr_l2_stack_files)

Locating and converting where necessary: 100%|██████████| 6/6 [00:07<00:00,  1.29s/it]
Reading netCDF files... took 0.46 s. Size is 1703.853 Mb


Locating and converting where necessary:  50%|█████     | 3/6 [00:06<00:05,  1.96s/it]

Locating and converting where necessary:  83%|████████▎ | 5/6 [00:06<00:00,  1.08it/s]

Locating and converting where necessary: 100%|██████████| 6/6 [00:07<00:00,  1.17it/s]

Locating and converting where necessary: 100%|██████████| 6/6 [00:07<00:00,  1.27s/it]


Reading netCDF files... 

took 0.45 s. Size is 1703.853 Mb


In [None]:
print(class_spec)

<xarray.Dataset> Size: 2GB
Dimensions:     (APS_ID: 995, LAMBDA_B: 9648, LAMBDA_R: 15288, filename: 6)
Coordinates:
  * APS_ID      (APS_ID) int64 8kB 1 2 3 4 5 6 ... 1002 1003 1004 1005 1006 1007
  * LAMBDA_B    (LAMBDA_B) float32 39kB 3.677e+03 3.677e+03 ... 6.089e+03
  * LAMBDA_R    (LAMBDA_R) float32 61kB 5.774e+03 5.774e+03 ... 9.596e+03
  * filename    (filename) <U32 768B 'stack_1003330__stack_1003329_APS' ... '...
    OBID        (filename) <U4 96B dask.array<chunksize=(1,), meta=np.ndarray>
Data variables:
    TARGID      (filename, APS_ID) object 48kB dask.array<chunksize=(1, 995), meta=np.ndarray>
    CNAME       (filename, APS_ID) object 48kB dask.array<chunksize=(1, 995), meta=np.ndarray>
    FLUX_RR_B   (filename, APS_ID, LAMBDA_B) float32 230MB dask.array<chunksize=(1, 995, 9648), meta=np.ndarray>
    IVAR_RR_B   (filename, APS_ID, LAMBDA_B) float32 230MB dask.array<chunksize=(1, 995, 9648), meta=np.ndarray>
    MODEL_RR_B  (filename, APS_ID, LAMBDA_B) float32 230MB dask

In [None]:
class_spec.close()

### STAR_SPEC

The STAR_SPEC extension contains spectra and model fits for stellar measurements.

In [None]:
# Table.read(l2_hdus["STAR_SPEC"], unit_parse_strict="silent")

In [None]:
# |export


def _star_spec_reader(fn):
    """Read the STAR_SPEC from a WEAVE L2 FITS file as a Dataset.

    All quantities are indexed by the `APS_ID` of the fibre.
    Spectral quantities are additionally indexed by wavelength bin `LAMBIN_{R,B,C}`,
    which does *not* correspond to the same wavelength for each spectrum.
    """
    cols = _read_fits_columns(fn, "STAR_SPEC", limit_precision=True)
    if not cols:
        return None
    coords = dict(APS_ID=cols.pop("APS_ID"))
    for c in cols:
        dims = ["APS_ID"]
        if c.endswith("_B"):
            dims += ["LAMBIN_B"]
        elif c.endswith("_R"):
            dims += ["LAMBIN_R"]
        elif c.endswith("_C"):
            dims += ["LAMBIN_C"]
        cols[c] = xr.Variable(dims, cols[c], attrs={"unit": str(cols[c].unit)})
    return xr.Dataset(cols, coords)


read_star_spec = to_dataset(_star_spec_reader)

In [None]:
# |hide
show_doc(read_star_spec, name="read_star_spec", title_level=4)

---

[source](https://github.com/bamford/qagmire/blob/main/qagmire/data.py#L556){target="_blank" style="float:right; font-size:smaller"}

#### read_star_spec

>      read_star_spec (fn)

Read the STAR_SPEC from a WEAVE L2 FITS file as a Dataset.

All quantities are indexed by the `APS_ID` of the fibre.
Spectral quantities are additionally indexed by wavelength bin `LAMBIN_{R,B,C}`,
which does *not* correspond to the same wavelength for each spectrum.

In [None]:
star_spec = read_star_spec(lr_l2_stack_files)

Locating and converting where necessary: 100%|██████████| 6/6 [00:00<00:00,  7.85it/s]
Reading netCDF files... took 0.92 s. Size is 106.808 Mb


Locating and converting where necessary:  83%|████████▎ | 5/6 [00:00<00:00, 10.38it/s]

Locating and converting where necessary: 100%|██████████| 6/6 [00:00<00:00,  9.64it/s]


Reading netCDF files... 

took 0.73 s. Size is 106.808 Mb


In [None]:
print(star_spec)

<xarray.Dataset> Size: 112MB
Dimensions:       (APS_ID: 24, filename: 6, LAMBIN_B: 9648, LAMBIN_R: 15288,
                   LAMBIN_C: 23672)
Coordinates:
  * APS_ID        (APS_ID) int64 192B 194 222 283 304 329 ... 922 946 948 992
  * filename      (filename) <U32 768B 'stack_1003426__stack_1003425_APS' ......
    OBID          (filename) <U4 96B dask.array<chunksize=(1,), meta=np.ndarray>
Dimensions without coordinates: LAMBIN_B, LAMBIN_R, LAMBIN_C
Data variables: (12/14)
    TARGID        (filename, APS_ID) object 1kB dask.array<chunksize=(1, 24), meta=np.ndarray>
    CNAME         (filename, APS_ID) object 1kB dask.array<chunksize=(1, 24), meta=np.ndarray>
    LAMBDA_RVS_B  (filename, APS_ID, LAMBIN_B) float32 6MB dask.array<chunksize=(1, 24, 9648), meta=np.ndarray>
    FLUX_RVS_B    (filename, APS_ID, LAMBIN_B) float32 6MB dask.array<chunksize=(1, 24, 9648), meta=np.ndarray>
    ERROR_RVS_B   (filename, APS_ID, LAMBIN_B) float32 6MB dask.array<chunksize=(1, 24, 9648), meta=np.nda

In [None]:
star_spec.close()

### GALAXY_SPEC

The GALAXY_SPEC extension contains log-wavelength-binned spectra and model fits by PPXF and GANDALF.

In [None]:
# Table.read(l2_hdus["GALAXY_SPEC"], unit_parse_strict="silent")

In [None]:
# |export


def _galaxy_spec_reader(fn):
    """Read the GALAXY_SPEC from a WEAVE L2 FITS file as a Dataset.

    All quantities are indexed by the `APS_ID` of the fibre.
    Spectral quantities are additionally indexed by log-wavelength bin `LOGLAMBIN`,
    which does *not* correspond to the same wavelength for each spectrum.
    """
    cols = _read_fits_columns(fn, "GALAXY_SPEC", limit_precision=True)
    if not cols:
        return None
    coords = dict(APS_ID=cols.pop("APS_ID"))
    for c in cols:
        dims = ["APS_ID"]
        if c.endswith("_PPXF") or c.endswith("_GAND"):
            dims += ["LOGLAMBIN"]
        cols[c] = xr.Variable(dims, cols[c], attrs={"unit": str(cols[c].unit)})
    return xr.Dataset(cols, coords)


read_galaxy_spec = to_dataset(_galaxy_spec_reader)

In [None]:
# |hide
show_doc(read_galaxy_spec, name="read_galaxy_spec", title_level=4)

---

[source](https://github.com/bamford/qagmire/blob/main/qagmire/data.py#L582){target="_blank" style="float:right; font-size:smaller"}

#### read_galaxy_spec

>      read_galaxy_spec (fn)

Read the GALAXY_SPEC from a WEAVE L2 FITS file as a Dataset.

All quantities are indexed by the `APS_ID` of the fibre.
Spectral quantities are additionally indexed by log-wavelength bin `LOGLAMBIN`,
which does *not* correspond to the same wavelength for each spectrum.

In [None]:
galaxy_spec = read_galaxy_spec(lr_l2_stack_files)

Locating and converting where necessary: 100%|██████████| 6/6 [00:21<00:00,  3.62s/it]
Reading netCDF files... took 0.85 s. Size is 10232.270 Mb


Locating and converting where necessary:  50%|█████     | 3/6 [00:24<00:18,  6.07s/it]

Locating and converting where necessary:  67%|██████▋   | 4/6 [00:26<00:09,  4.71s/it]

Locating and converting where necessary:  83%|████████▎ | 5/6 [00:28<00:03,  3.39s/it]

Locating and converting where necessary: 100%|██████████| 6/6 [00:29<00:00,  2.58s/it]

Locating and converting where necessary: 100%|██████████| 6/6 [00:29<00:00,  4.84s/it]


Reading netCDF files... 

took 0.74 s. Size is 10232.270 Mb


In [None]:
print(galaxy_spec)

<xarray.Dataset> Size: 11GB
Dimensions:           (APS_ID: 994, filename: 6, LOGLAMBIN: 23671)
Coordinates:
  * APS_ID            (APS_ID) int32 4kB 1 2 3 4 5 ... 1003 1004 1005 1006 1007
  * filename          (filename) <U32 768B 'stack_1003330__stack_1003329_APS'...
    OBID              (filename) <U4 96B dask.array<chunksize=(1,), meta=np.ndarray>
Dimensions without coordinates: LOGLAMBIN
Data variables: (12/15)
    TARGID            (filename, APS_ID) object 48kB dask.array<chunksize=(1, 994), meta=np.ndarray>
    CNAME             (filename, APS_ID) object 48kB dask.array<chunksize=(1, 994), meta=np.ndarray>
    LOGLAM_PPXF       (filename, APS_ID, LOGLAMBIN) float32 565MB dask.array<chunksize=(1, 994, 23671), meta=np.ndarray>
    FLUX_PPXF         (filename, APS_ID, LOGLAMBIN) float32 565MB dask.array<chunksize=(1, 994, 23671), meta=np.ndarray>
    ERROR_PPXF        (filename, APS_ID, LOGLAMBIN) float32 565MB dask.array<chunksize=(1, 994, 23671), meta=np.ndarray>
    MODEL_PPXF 

In [None]:
galaxy_spec.close()

In [16]:
# |hide
import nbdev

nbdev.nbdev_export()