In [None]:
%load_ext autoreload
%autoreload 2

import sys

import os

sys.path.insert(
    0, os.path.abspath(os.path.join(os.getcwd(), "../"))
)  # adds root dir 'wine_analyis_hplc_uv' to path.

from agilette import agilette_core as ag

lib = ag.Agilette("/Users/jonathan/0_jono_data").library
lib.data_table().head(100)

In [None]:
data_table_df = lib.data_table()

In [None]:
run = data_table_df[
    data_table_df["sample_name"] == "2021-debortoli-cabernet-merlot_avantor"
].loc[0]["path"]

In [None]:
all_runs = lib.combined_dict()

In [None]:
koerner = all_runs["2023-02-22_KOERNER-NELLUCIO-02-21.D"]

In [None]:
koerner_ch = koerner.extract_ch_data()

In [None]:
koerner_ch.keys()

In [None]:
koerner_260 = koerner_ch["260.0"]

## Baseline Detection

In [None]:
import plotly.graph_objs as go

import pandas as pd
import peakutils

In [None]:
fig = go.Figure()

trace = go.Scatter(
    x=koerner_260.data_df["mins"], y=koerner_260.data_df["mAU"], mode="lines"
)

fig.add_trace(trace)

fig.show()

In [None]:
baseline_values = peakutils.baseline(koerner_260.data_df["mAU"])
baseline_values

In [None]:
baseline_trace = go.Scatter(
    x=koerner_260.data_df["mins"], y=baseline_values, name="Baseline"
)
fig.add_trace(baseline_trace)

fig.show()

It's not a great approximation of the baseline, but it will do. Now get the peak heights.

In [None]:
from scipy.signal import find_peaks

peaks = find_peaks(koerner_260.data_df["mAU"], height=4)

peaks_y_idx, peaks_y = peaks

peaks_x = koerner_260.data_df["mins"].loc[peaks_y_idx]

print(peaks_x.values)
print(peaks_y["peak_heights"])

peak_trace = go.Scatter(
    x=peaks_x, y=peaks_y["peak_heights"], name="peaks", mode="markers"
)

fig.add_trace(peak_trace)

fig.show()

So the problem with this approach is that the peak detection is based on an absolute value not in reference to a changing baseline. We need a better method to both fit the actual baseline more closely AND a way of interfacting it with a peak detection algorithm.

## ASLS Fitted Baseline

In [None]:
from pybaselines import Baseline

baseline_fitter = Baseline(x_data=koerner_260.data_df["mins"].values)

baseline_y = baseline_fitter.asls(koerner_260.data_df["mAU"].values)[0]


def baseline_trace(baseline_y_, x_, name_):
    baseline_trace = go.Scatter(x=x_, y=baseline_y_, mode="lines", name=name_)

    return baseline_trace

In [None]:
baseline_fitter.asls

In [None]:
fig2 = go.Figure()

fig2.add_trace(trace)

fig2.add_trace(
    baseline_trace(
        baseline_y_=baseline_y, x_=baseline_fitter.__dict__["x"], name_="asls"
    )
)

fig2.show()

## IASLS Fitted Baseline

In [None]:
baseline_y_iasls = baseline_fitter.iasls(koerner_260.data_df["mAU"].values)[0]

fig3 = go.Figure()

trace_iasls = baseline_trace(
    x_=baseline_fitter.__dict__["x"], baseline_y_=baseline_y_iasls, name_="iasls"
)

fig3.add_traces([trace, trace_iasls])

fig3.show()

## AIRPLS

In [None]:
baseline_y_airpls = baseline_fitter.airpls(koerner_260.data_df["mAU"].values)[0]

fig4 = go.Figure()

trace_airpls = baseline_trace(
    x_=baseline_fitter.__dict__["x"], baseline_y_=baseline_y_airpls, name_="airpls"
)

fig4.add_traces([trace, trace_airpls])

fig4.show()

The above fittings look pretty good, with airpls looking slightly better from a visual inspection. The difference is..

In [None]:
sum(baseline_y_airpls - baseline_y_iasls)

Nothing, there you go, don't trust your eyes. In that case I'll stick with iasls as it is presumably older, but we can experiment with different ones over the entire data set at some point.

In [None]:
koerner_260_minus_baseline = koerner_260.data_df["mAU"].values - baseline_y_airpls

baseline_subtract_y_airpls_trace = go.Scatter(
    x=koerner_260.data_df["mins"], y=koerner_260_minus_baseline, name="koerner_260"
)

fig5 = go.Figure()

fig5.add_trace(baseline_subtract_y_airpls_trace)
fig5.show()

An interesting observation that has arisen is the fact htat it appears that there are no components eluting after 30 mins. For a 2.1% gradient that would mean that by 63% MeOH, everything has eluted, and presumably that bump at 41mins is the refractive index change when it drops back down from 95% to 5%. Should check a few more samples and see what they look like, then check with Andrew what he thinks about reducing the run time and capping the total methanol, etc.

Now add some peak detection.

In [None]:
peaks_idx, peak_heights = find_peaks(koerner_260_minus_baseline, height=4)

peak_height_values = peak_heights["peak_heights"]

peaks_x = koerner_260.data_df["mins"].loc[peaks_idx]

peaks_trace = go.Scatter(
    x=peaks_x, y=peak_height_values, mode="markers", name="peaks > 4 mAU"
)

fig5.add_trace(peaks_trace)

fig5.show()

Setting a lower threshold of 4 mAU and using the default separtion setting returna pretty good looking peak trace, although we lose a lot between 3 and 4, I will assume that there is already enough information within the detected peaks. Something that could be investigated at a later date.

In [None]:
len(peaks_idx)

With the current settings, there are 36 peaks detected.

In [None]:
peaks_idx_df = pd.DataFrame(peaks_idx)


peaks_idx_df.describe()