# Getting started with prtecan

In [None]:
%load_ext autoreload
%autoreload 2

import logging
import os
import warnings
from pathlib import Path

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sb

from clophfit import prtecan
from clophfit.binding import fitting, plotting
from clophfit.prtecan import Titration

specific_logger = logging.getLogger("clophfit.prtecan")
specific_logger.setLevel(logging.INFO)

data_tests = (Path("..") / ".." / "tests" / "Tecan").resolve().absolute()

In [None]:
os.chdir(data_tests / "L1")
warnings.filterwarnings("ignore", category=UserWarning, module="clophfit.prtecan")

## Parsing a Single Tecan Files

A Tecan file comprises of multiple label blocks, each with its unique metadata. This metadata provides critical details and context for the associated label block. In addition, the Tecan file itself also has its overarching metadata that describes its overall content.

When the KEYS for label blocks are identical, it indicates that these label blocks are equivalent - meaning, they contain the same measurements. The equality of KEYS plays a significant role in parsing and analyzing Tecan files, as it assists in identifying and grouping similar measurement sets together. This understanding of label block equivalence based on KEY similarity is critical when working with Tecan files.

In [None]:
tf = prtecan.Tecanfile("290513_8.8.xls")
lb0 = tf.labelblocks[0]
lb1 = tf.labelblocks[1]
tf.metadata

In [None]:
print("Metadata:\n", lb0.metadata, "\n")
print("Data:\n", lb0.data)

In [None]:
tf1 = prtecan.Tecanfile("290513_8.2.xls")

tf1.labelblocks[1].__almost_eq__(lb1), tf1.labelblocks[1] == lb1

## Titration inherits TecanfilesGroup

In [None]:
tfg = prtecan.TecanfilesGroup([tf, tf1])
lbg0 = tfg.labelblocksgroups[0]

print(lbg0.data["A01"])

lbg0.data_nrm["A01"]

In [None]:
tit = prtecan.Titration([tf, tf1], x=np.array([8.8, 8.2]), is_ph=True)
print(tit)
tit.labelblocksgroups[0].data_nrm["A01"]

In [None]:
tit.labelblocksgroups == tfg.labelblocksgroups

In [None]:
tit.additions = [100, 1]

In [None]:
tit.params.nrm = True
tit.params.dil = True
tit.params.bg = True
tit.params

In [None]:
tit.buffer.wells = ["B02"]

In [None]:
tit.buffer.dataframes

In [None]:
tit.bg, tit.bg_err

In [None]:
tit.labelblocksgroups[0].data_nrm["A01"]

## Group a list of tecan files into a titration

The command Titration.fromlistfile("../listfile") reads a list of Tecan files, identifies unique measurements in each file, groups matching ones, and combines them into a titration set for further analysis.

In [None]:
tit = Titration.fromlistfile("./list.pH.csv", is_ph=True)
print(tit.x)
lbg0 = tit.labelblocksgroups[0]
lbg1 = tit.labelblocksgroups[1]
print(lbg1.labelblocks[6].metadata["Temperature"])
lbg0.metadata, lbg1.metadata

Within each labelblockgroups `data_norm` is immediately calculated.

In [None]:
(lbg0.data["H03"], lbg1.data, lbg0.data_nrm["H03"], lbg1.data_nrm["H03"])

Start with platescheme loading to set buffer wells (and consequently buffer values).

Labelblocks group will be populated with data buffer subtracted with/out normalization.

In [None]:
tit.load_scheme("./scheme.txt")
print(f"Buffer wells : {tit.scheme.buffer}")
print(f"Ctrl wells   : {tit.scheme.ctrl}")
print(f"CTR name:wells {tit.scheme.names}")

In [None]:
tit.scheme

In [None]:
(lbg0.data["H12"], lbg1.data_nrm["H12"])

In [None]:
tit.load_additions("./additions.pH")
tit.additions

In [None]:
(lbg0.data["H12"], tit.data[0]["H12"], lbg0.data_nrm["H12"], tit.bg[0])

The order in which you apply dilution correction and plate scheme can impact your intermediate results, even though the final results might be the same.

    Dilution correction adjusts the measured data to account for any dilutions made during sample preparation. This typically involves multiplying the measured values by the dilution factor to estimate the true concentration of the sample.

    A plate scheme describes the layout of the samples on a plate (common in laboratory experiments, such as those involving microtiter plates). The plate scheme may involve rearranging or grouping the data in some way based on the physical location of the samples on the plate.

### Reassign Buffer Wells

You can reassess buffer wells, updating the data to account for any dilution (additions) and subtracting the updated buffer value. This is a handy feature that gives you more control over your analysis.

For instance, consider the following data for a particular well:

In [None]:
print(tit.labelblocksgroups[1].data_nrm["D01"])
tit.data[1].get("D01")

In [None]:
tit.params.bg = False
tit.params.dil = False
print(tit.data[1]["D02"])

You can reassign buffer wells using the `buffer_wells` attribute:

In [None]:
tit.params.bg = True
tit.buffer.wells = ["D01", "E01"]

In [None]:
tit.bg

This updates the data for the specified wells, correcting for dilution and subtracting the buffer value:

<div style="border: 2px solid red; padding: 10px; background-color: #ffdddd; margin: 20px 0;">
    <h2 style="color: red;">🚨 The data remain: 🚨</h2>
    <p> - <strong>unchanged</strong> in <code>labelblocksgroups[:].data</code></p>
    <p> - <strong>buffer subtracted</strong> in <code>labelblocksgroups[:].data_buffersubtracted</code></p>
    <p> - <strong>buffer subtracted and dilution corrected</strong> in <code>data</code></p>
</div>

## Fitting

test:

- E10
- F10
- G09

TODO:

- Remove datapoint ini fin outlier

In [None]:
os.chdir(data_tests / "L1")
tit = Titration.fromlistfile("./list.pH.csv", is_ph=True)
tit.load_scheme("./scheme.0.txt")
tit.load_additions("additions.pH")
tit

### Choose buffer value to be subtracted between mean values and ODR fitted values.

In [None]:
lb = 1

x = tit.buffer.dataframes_nrm[lb]["fit"]
y = tit.buffer.dataframes_nrm[lb]["mean"]
x_err = tit.buffer.dataframes_nrm[lb]["fit_err"] / 10
y_err = tit.buffer.dataframes_nrm[lb]["sem"] / 10

plt.errorbar(
    x,
    y,
    xerr=x_err,
    yerr=y_err,
    fmt="o",
    color="blue",
    ecolor="lightgray",
    elinewidth=2,
    capsize=4,
)
plt.xlabel("ODR Fit")
plt.ylabel("Buffer wells Mean")

In [None]:
tit.buffer.plot().fig

In [None]:
tit.buffer.plot().fig

In [None]:
tit.bg_err

In [None]:
tit.params.bg_adj = True
tit.params.bg_mth = "fit"
tit.bg, tit.bg_err

In [None]:
specific_logger.setLevel(logging.ERROR)
# plt.show()
tit.results[2]["A02"].figure

In [None]:
k = "A12"

r = tit.results[2][k]
r.result.params

In [None]:
ro = fitting.fit_binding_odr(r)
ro.figure
# ro.result.params

In [None]:
tit._dil_corr

In [None]:
tit.params.nrm = False
tit.params

In [None]:
tit.buffer.plot(True).fig

In [None]:
tit.labelblocksgroups[0].data

In [None]:
os.chdir("../../Tecan/140220/")
tit = Titration.fromlistfile("./list.pH.csv", is_ph=True)
tit.load_scheme("./scheme.txt")
tit.load_additions("additions.pH")

In [None]:
tit.buffer.plot().fig

In [None]:
tit.data[1]["H03"]

In [None]:
tit.params.bg_adj = True
tit.params.bg_mth = "mean"

tit.params

In [None]:
df1 = pd.read_csv("../140220/fit1-1.csv", index_col=0)
merged_df = tit.result_dfs[1][["K", "sK"]].merge(df1, left_index=True, right_index=True)

sb.jointplot(merged_df, x="K_y", y="K_x", ratio=3, space=0.4)

In [None]:
tit.results[2]["A01"].figure

In [None]:
tit.data[1]["A01"]

If a fit fails in a well, the well key will be anyway present in results list of dict.

In [None]:
print(tit.data[0]["H02"])
print(tit.results[1].keys() - tit.results[0].keys())
tit.results[0]["H02"]

In [None]:
tit.params.nrm = False

In [None]:
tit.results[2]["H01"].figure

And in the global fit (i.e. fitting 2 labelblocks) dataset with insufficient data points are removed.

In [None]:
tit.params.nrm = True
well = "H02"
y0 = np.array(tit.data[0][well])
y1 = np.array(tit.data[1][well])
x = np.array(tit.x)
ds = fitting.Dataset(
    {"y0": fitting.DataArray(x, y0), "y1": fitting.DataArray(x, y1)}, is_ph=True
)
rfit = fitting.fit_binding_glob(ds)

rfit.result.params

In [None]:
rfit.figure

In [None]:
fitting.fit_binding_odr(rfit).figure

In [None]:
tit.result_dfs[1].head()

You can decide how to pre-process data with datafit_params:
- [bg] subtract background
- [dil] apply correction for dilution (when e.g. during a titration you add titrant without protein)
- [nrm] normalize for gain, number of flashes and integration time. 

In [None]:
tit.params.nrm = False
tit.params.bg = True
tit.params.bg_adj = False
tit.data[0]["E06"]

### Posterior analysis with emcee

To explore the posterior of parameters you can use the Minimizer object returned in FitResult.

In [None]:
np.random.seed(0)  # noqa: NPY002
remcee = rfit.mini.emcee(
    burn=50,
    steps=2000,
    workers=8,
    thin=10,
    nwalkers=30,
    progress=False,
    is_weighted=False,
)

In [None]:
f = plotting.plot_emcee(remcee.flatchain)
print(remcee.flatchain.quantile([0.03, 0.97])["K"].to_list())

In [None]:
samples = remcee.flatchain[["K"]]
# Convert the dictionary of flatchains to an ArviZ InferenceData object
samples_dict = {key: np.array(val) for key, val in samples.items()}
idata = az.from_dict(posterior=samples_dict)
k_samples = idata.posterior["K"].to_numpy()
percentile_value = np.percentile(k_samples, 3)
print(f"Value at which the probability of being higher is 99%: {percentile_value}")

az.plot_forest(k_samples)

### Cl titration analysis

In [None]:
os.chdir("../140220/")
cl_an = prtecan.Titration.fromlistfile("list.cl.csv", is_ph=False)
cl_an.load_scheme("scheme.txt")
cl_an.scheme

In [None]:
cl_an.load_additions("additions.cl")
print(cl_an.x)
cl_an.x = prtecan.calculate_conc(cl_an.additions, 1000)
cl_an.x

In [None]:
fres = cl_an.results[2][well]
print(fres.is_valid(), fres.result.bic, fres.result.redchi)
fres.figure

### Plotting

In [None]:
plotter = prtecan.TitrationPlotter(tit)

In [None]:
f = plotter.plot_k(1, title="2014-12-23", hue_column="S1_default")

### selection

In [None]:
tit.params.nrm = True
tit.params.dil = True
tit.params.bg_mth = "fit"
tit

In [None]:
tit.result_dfs[1].loc[tit.result_dfs[1]["ctrl"].isna(), "ctrl"] = "U"

sb.set_style("whitegrid")
g = sb.PairGrid(
    tit.result_dfs[1],
    x_vars=["K", "S1_default", "S0_default"],
    y_vars=["K", "S1_default", "S0_default"],
    hue="ctrl",
    palette="Set1",
    diag_sharey=False,
)
g.map_lower(plt.scatter)
g.map_upper(sb.kdeplot, fill=True)
g.map_diag(sb.kdeplot)
g.add_legend()

In [None]:
with sb.axes_style("darkgrid"):
    g = sb.pairplot(
        tit.result_dfs[2][["S1_y0", "S0_y0", "K", "S1_y1", "S0_y1"]],
        hue="S1_y0",
        palette="Reds",
        corner=True,
        diag_kind="kde",
    )

### combining

In [None]:
res_unk = tit.result_dfs[1].loc[tit.keys_unk].sort_index()
res_unk["well"] = res_unk.index

In [None]:
f = plt.figure(figsize=(24, 14))
# Make the PairGrid
g = sb.PairGrid(
    res_unk,
    x_vars=["K", "S1_default", "S0_default"],
    y_vars="well",
    height=12,
    aspect=0.4,
)
# Draw a dot plot using the stripplot function
g.map(sb.stripplot, size=14, orient="h", palette="Set2", edgecolor="auto")

# Use the same x axis limits on all columns and add better labels
# g.set(xlim=(0, 25), xlabel="Crashes", ylabel="")

# Use semantically meaningful titles for the columns
titles = ["$pK_a$", "B$_{neutral}$", "B$_{anionic}$"]

for ax, title in zip(g.axes.flat, titles, strict=False):
    # Set a different title for each axes
    ax.set(title=title)

    # Make the grid horizontal instead of vertical
    ax.xaxis.grid(False)
    ax.yaxis.grid(True)

sb.despine(left=True, bottom=True)