In [2]:
from dataclasses import dataclass
from findiff import FinDiff
from hplc_py.definitions import PRECISION
from hplc_py.datasets import DataSets
from hplc_py.baseline_correction import baseline_correction
from sklearn.compose import ColumnTransformer
from hplc_py.map_signal import map_signal

import holoviews as hv
import hvplot
import numpy as np
import pandera as pa
import polars as pl

dsets = DataSets()
ringland = dsets.ringland.fetch().with_columns(
    pl.col("time").round(9), pl.col("signal").round(9)
)

%reload_ext autoreload
%autoreload 2

from sklearn.pipeline import Pipeline
import hplc_py.transformers as transformers

pl.Config.set_tbl_rows(5)

polars.config.Config

In [3]:
input_data = ringland.select(pl.col("time"), pl.col("signal"))
input_data

time,signal
f64,f64
0.005833,0.001952
0.0125,0.001825
0.019167,0.002004
…,…
26.9925,1.730151
26.999167,1.728989


# Test: Deconvolution Pipeline

A notebook demonstrating the function of the deconvolution pipeline.

In [4]:
# Preprocess, signal map, data prep, deconvolution

pipeline_steps = [
    ("rounder", transformers.Rounder()),
    ("first_diff_sym", transformers.FiniteDiffAdder(order=1, sign="+", k=2.2)),
    ("second_diff_sub", transformers.FiniteDiffAdder(order=2, sign="-", k=0.1)),
    (
        "savgol1",
        transformers.SavgolFilter(
            window_length=5,
            polyorder=2,
        ),
    ),
    ("bcorr", baseline_correction.SNIPBCorr(n_iter=39)),
    (
        "savgol2",
        transformers.SavgolFilter(
            window_length=5,
            polyorder=2,
        ),
    ),
    ("rounder2", transformers.Rounder()),
]

pipe = Pipeline(steps=pipeline_steps, verbose=True)

ct_transformers = [
    ("sr_pipe", pipe, ["signal"]),
]

ct = ColumnTransformer(
    transformers=ct_transformers,
    remainder="passthrough",
    verbose_feature_names_out=False,
).set_output(transform="polars")

data_ = input_data.pipe(ct.fit_transform)

data_

[Pipeline] ........... (step 1 of 7) Processing rounder, total=   0.0s
[Pipeline] .... (step 2 of 7) Processing first_diff_sym, total=   0.0s
[Pipeline] ... (step 3 of 7) Processing second_diff_sub, total=   0.0s
[Pipeline] ........... (step 4 of 7) Processing savgol1, total=   0.0s


Performing baseline correction: 100%|██████████| 39/39 [00:00<00:00, 276.47it/s]

[Pipeline] ............. (step 5 of 7) Processing bcorr, total=   0.2s
[Pipeline] ........... (step 6 of 7) Processing savgol2, total=   0.0s
[Pipeline] .......... (step 7 of 7) Processing rounder2, total=   0.0s





signal,time
f64,f64
0.000088,0.005833
-0.000171,0.0125
-0.000015,0.019167
…,…
0.00005,26.9925
0.000017,26.999167


signal mapping needs a report. to include:

- number of peaks detected
- x, y, quantile of top 5 peaks
- number of windows assigned
- window with most peaks

start with that.

Do it as a class that accepts the SignalMap object as input.

the report will be a printing of tables.

In [5]:
# signal mapping

signal_mapper = map_signal.SignalMapper(find_peaks_kwargs=dict(prominence=0.001))
signal_mapper.fit_transform(
    X=data_.with_row_index("idx")
    .drop("time")
    .cast(dict(idx=int))
    .rename(dict(signal="X"))
)
signal_mapper.signal_mapping_.viz_mode().draw_signal().plot()

shape: (2, 4)
┌──────┬───────┬────────────┬─────────────┐
│ left ┆ right ┆ w_idx_left ┆ w_idx_right │
│ ---  ┆ ---   ┆ ---        ┆ ---         │
│ i64  ┆ i64   ┆ i64        ┆ i64         │
╞══════╪═══════╪════════════╪═════════════╡
│ 265  ┆ 266   ┆ 0          ┆ 1           │
│ 560  ┆ 561   ┆ 1          ┆ 2           │
└──────┴───────┴────────────┴─────────────┘
left: the interpeak window start idx, right: interpeak window end idx, w_idx_left: the peak window idx on the left, w_idx_right: the peak window idx on the right.


Do with it what you will, but the result is neighbouring peak windows with no intermediate interpeak window



AttributeError: 'WindowedPeakMap' object has no attribute 'X'

In [None]:
time_window_mapping = signal_mapper.signal_mapping_.window_map.time_window_mapping
time_window_mapping

w_type,w_idx,idx
str,i64,i64
"""interpeak""",0,0
"""interpeak""",0,1
"""interpeak""",0,2
…,…,…
"""interpeak""",2,4048
"""interpeak""",2,4049


In [None]:
signal_mapper.signal_mapping_.peak_map

contains the following tables:
	maxima:
		columns: ['p_idx', 'loc', 'dim', 'value']
		shape: (134, 4)
	contour_line_bounds
		columns: ['p_idx', 'loc', 'msnt', 'dim', 'value']
		shape: (804, 5)
	widths:
		columns: ['p_idx', 'msnt', 'value']
		shape: (134, 3)

In [None]:
signal_mapper.signal_mapping_.peak_map.widths

p_idx,msnt,value
i64,str,f64
0,"""width_whh""",7.052996
1,"""width_whh""",7.059117
2,"""width_whh""",9.249463
…,…,…
65,"""width_pb""",111.878249
66,"""width_pb""",187.862789


In [33]:
signal_mapper.signal_mapping_.peak_map

AttributeError: 'WindowedPeakMap' object has no attribute 'tbl_repr_formatter'

In [None]:
class SignalMapReport:
    def __init__(self, signal_map: map_signal.SignalMap):
        self.signal_map = signal_map

        # window mapping

        self.window_map = self.signal_map.window_map
        self.X_windowed = self.window_map.X_windowed
        self.window_bounds = self.window_map.window_bounds

        # peak mapping

        self.peak_map = self.signal_map.peak_map