In [None]:
%reload_ext autoreload
%autoreload 3 --print

import logging
from pca_analysis import xr_signal

from pca_analysis.definitions import PARAFAC2_TESTSET
from pca_analysis import xr_plotly
import plotly.io as pio
import xarray as xr
import darkdetect

logger = logging.getLogger(__name__)

logger.setLevel(logging.DEBUG)

xr.set_options(display_expand_data=False, display_expand_coords=False)

if darkdetect.isDark():
    pio.templates.default = "plotly_dark"

ds = xr.load_dataset(PARAFAC2_TESTSET)

# speed up development by using a subset.
ds = ds.sel(wavelength=slice(210, 260, 5), mins=slice(0, 30))
ds


## Smoothing

The criteria is that with the default find_peaks params, no peaks are detected before the first 0.77 seconds. This can be achieved through savgol smoothing.

In [None]:
from pca_analysis.preprocessing import smooth

(
    ds.isel(id_rank=slice(2, 6))
    .assign(
        smoothed=ds.raw_data.pipe(
            smooth.savgol_smooth,
            input_core_dims=[
                ["mins"],
            ],
            output_core_dims=[["mins"]],
            window_length=60,
            polyorder=2,
        )
    )
    .sel(wavelength=260, mins=slice(0, 10))
    .plotly.facet_plot_overlay(
        grouper="id_rank", var_keys=["raw_data", "smoothed"], col_wrap=2
    )
)


## Sharpening

## Baseline Subtraction

To simplify tool development, we should first subtract the baseline from each sample. Whether or not there is a baseline is questionable, however the rise and fall does roughly correspond with the change in concentration of methanol in the mobile phase, potentially introducing background absorption. Either way, the data will be easier to work with with zeroed baselines.

In [None]:
from pca_analysis.preprocessing import bcorr


def correct_baselines_ds(ds: xr.Dataset, core_dim, **kwargs):
    """
    Correct baseline over all samples and wavelengths, adding the baseline
    and corrected signal as variables to the dataset.

    Hardcoded keys
    """

    if not isinstance(ds, xr.Dataset):
        raise TypeError

    ds = ds.assign(
        baselines=xr.apply_ufunc(
            bcorr.apply_snip,
            ds.raw_data,
            kwargs=kwargs,
            input_core_dims=[
                [core_dim],
            ],
            output_core_dims=[[core_dim]],
            # need vectorize to do the looping
            vectorize=True,
        )
    )
    ds = ds.assign(data_corr=ds.raw_data - ds.baselines)

    return ds


ds = ds.pipe(correct_baselines_ds, core_dim="mins", max_half_window=30)
display(ds)


In [None]:
overlay_fig = (
    ds.transpose("id_rank", "wavelength", "mins")
    .isel(wavelength=0)
    .plotly.facet_plot_overlay(
        grouper="id_rank",
        var_keys=["raw_data", "baselines", "data_corr"],
        col_wrap=3,
        x_key="mins",
    )
)
overlay_fig
