# Getting started with prtecan

In [None]:
import os
import warnings

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sb

from clophfit import prtecan
from clophfit.binding import fitting, plotting
from clophfit.prtecan import Titration, TitrationAnalysis

%load_ext autoreload
%autoreload 2

os.chdir("../../tests/Tecan/140220/")
warnings.filterwarnings("ignore", category=UserWarning, module="clophfit.prtecan")

## Parsing a Single Tecan File

A Tecan file comprises of multiple label blocks, each with its unique metadata. This metadata provides critical details and context for the associated label block. In addition, the Tecan file itself also has its overarching metadata that describes its overall content.

When the KEYS for label blocks are identical, it indicates that these label blocks are equivalent - meaning, they contain the same measurements. The equality of KEYS plays a significant role in parsing and analyzing Tecan files, as it assists in identifying and grouping similar measurement sets together. This understanding of label block equivalence based on KEY similarity is critical when working with Tecan files.

In [None]:
tf = prtecan.Tecanfile("../290212_6.38.xls")
lb0 = tf.labelblocks[0]
tf.metadata

In [None]:
print("Metadata:\n", lb0.metadata, "\n")
print("Data:\n", lb0.data)

## Group a list of tecan files into a titration

The command Titration.fromlistfile("../listfile") reads a list of Tecan files, identifies unique measurements in each file, groups matching ones, and combines them into a titration set for further analysis.

In [None]:
tit = Titration.fromlistfile("./list.pH", is_ph=True)
print(tit.conc, "\n")
lbg0 = tit.labelblocksgroups[0]
lbg1 = tit.labelblocksgroups[1]
lbg0.metadata, lbg1.metadata

In [None]:
lbg0.labelblocks[5].metadata["Temperature"]

In [None]:
(
    lbg0.data["H12"],
    lbg1.data["H12"],
    lbg0.data_buffersubtracted,
    lbg1.data_buffersubtracted,
)

Start with platescheme loading to set buffer wells (and consequently buffer values).

Labelblocks group will be populated with data buffer subtracted with/out normalization.

In [None]:
tit.load_scheme("./scheme.txt")
print(f"Buffer wells : {tit.scheme.buffer}")
print(f"Ctrl wells   : {tit.scheme.ctrl}")
print(f"CTR name:wells {tit.scheme.names}")

In [None]:
(
    lbg0.data["H12"],
    lbg1.data["H12"],
    lbg0.data_buffersubtracted["H12"],
    lbg1.data_buffersubtracted["H12"],
    tit.data,
)

In [None]:
tit.load_additions("./additions.pH")
tit.additions

In [None]:
(
    lbg0.data["H12"],
    lbg1.data["H12"],
    lbg0.data_buffersubtracted["H12"],
    lbg1.data_buffersubtracted["H12"],
    tit.data[0]["H12"],
    tit.data[1]["H12"],
)

The order in which you apply dilution correction and plate scheme can impact your intermediate results, even though the final results might be the same.

    Dilution correction adjusts the measured data to account for any dilutions made during sample preparation. This typically involves multiplying the measured values by the dilution factor to estimate the true concentration of the sample.

    A plate scheme describes the layout of the samples on a plate (common in laboratory experiments, such as those involving microtiter plates). The plate scheme may involve rearranging or grouping the data in some way based on the physical location of the samples on the plate.

In [None]:
tit = Titration.fromlistfile("./list.pH", is_ph=True)
lbg0 = tit.labelblocksgroups[0]
lbg1 = tit.labelblocksgroups[1]
tit.load_additions("./additions.pH")
(
    lbg0.data["H12"],
    lbg1.data["H12"],
    lbg0.data_buffersubtracted,
    lbg1.data_buffersubtracted,
    tit.data,
)

In [None]:
tit.load_scheme("./scheme.txt")
(
    lbg0.data["H12"],
    lbg1.data["H12"],
    lbg0.data_buffersubtracted["H12"],
    lbg1.data_buffersubtracted["H12"],
    tit.data[0]["H12"],
    tit.data[1]["H12"],
)

### Reassign Buffer Wells

You can reassess buffer wells, updating the data to account for any dilution (additions) and subtracting the updated buffer value. This is a handy feature that gives you more control over your analysis.

For instance, consider the following data for a particular well:

In [None]:
print(tit.labelblocksgroups[1].data["D01"])
print(tit.labelblocksgroups[1].data_buffersubtracted["D01"])
print(tit.data[1]["D01"])

You can reassign buffer wells using the `buffer_wells` attribute:

In [None]:
tit.buffer_wells = ["D01", "E01"]

This updates the data for the specified wells, correcting for dilution and subtracting the buffer value:

In [None]:
print(tit.labelblocksgroups[1].data["D01"])
print(tit.labelblocksgroups[1].data_buffersubtracted["D01"])
print(tit.data[1]["D01"])

The data remains:
- unchanged in `labelblocksgroups[].data`
- adjusted buffer subtracted in `labelblocksgroups[].data_buffersubtracted`
- adjusted buffer subtracted and dilution corrected in `data`.

## Titration Analysis

In [None]:
titan = TitrationAnalysis.fromlistfile("./list.pH", is_ph=True)
titan.load_scheme("./scheme.txt")
titan.load_additions("additions.pH")
# g = titan.plot_buffer()
titan.datafit_params = {"bg": True, "nrm": True, "dil": True}

In [None]:
df1 = pd.read_csv("fit1.csv", index_col=0)
merged_df = titan.result_dfs[1][["K", "sK"]].merge(
    df1, left_index=True, right_index=True
)

sb.jointplot(merged_df, x="K_y", y="K_x", kind="reg", ratio=3)

If a fit fails in a well, the well key will be anyway present in results list of dict.

In [None]:
print(titan.data[0]["H02"])
print(titan.results[1].keys() - titan.results[0].keys())
titan.results[0]["H02"]

In [None]:
titan.fitkws = TitrationAnalysis.FitKwargs(fin=-1, weight=False)
titan.results[2]["H02"].figure

And in the global fit (i.e. fitting 2 labelblocks) dataset with insufficient data points are removed.

In [None]:
well = "H02"
y0 = np.array(titan.data[0][well])
y1 = np.array(titan.labelblocksgroups[1].data_buffersubtracted[well])
y1 = np.array(titan.data[1][well])
x = np.array(titan.conc)
ds = fitting.Dataset(x, {"y0": y0, "y1": y1}, is_ph=True)
rfit = fitting.fit_binding_glob(ds)

rfit.result.params

In [None]:
titan.result_dfs[1].head()

You can decide how to pre-process data with datafit_params:
- [bg] subtract background
- [dil] apply correction for dilution (when e.g. during a titration you add titrant without protein)
- [nrm] normalize for gain, number of flashes and integration time. 

In [None]:
titan.datafit_params = {"bg": 1, "nrm": 0, "dil": 0}
titan.fitdata[1]["E06"]

### Posterior analysis with emcee

To explore the posterior of parameters you can use the Minimizer object returned in FitResult.

In [None]:
np.random.seed(0)
remcee = rfit.mini.emcee(
    burn=50, steps=2000, workers=8, thin=10, nwalkers=30, progress=False
)

In [None]:
f = plotting.plot_emcee(remcee.flatchain)
print(remcee.flatchain.quantile([0.03, 0.97])["K"].to_list())

In [None]:
samples = remcee.flatchain[["K"]]
# Convert the dictionary of flatchains to an ArviZ InferenceData object
samples_dict = {key: np.array(val) for key, val in samples.items()}
idata = az.from_dict(posterior=samples_dict)
k_samples = idata.posterior["K"].to_numpy()
percentile_value = np.percentile(k_samples, 3)
print(f"Value at which the probability of being higher is 99%: {percentile_value}")

az.plot_forest(k_samples)

### Cl titration analysis

In [None]:
cl_an = prtecan.TitrationAnalysis.fromlistfile("list.cl", is_ph=False)
cl_an.load_scheme("scheme.txt")
cl_an.scheme

In [None]:
cl_an.load_additions("additions.cl")
print(cl_an.conc)
cl_an.conc = prtecan.calculate_conc(cl_an.additions, 1000)
cl_an.conc

In [None]:
fres = cl_an.results[2][well]
print(fres.is_valid(), fres.result.bic, fres.result.redchi)
fres.figure

### Plotting

In [None]:
f = titan.plot_k(1, title="2014-12-23", hue_column="S1_default")

In [None]:
titan.print_fitting(2)

### selection

In [None]:
titan

In [None]:
titan.fitdata_params = {"dil": 1, "nrm": 1}
titan

In [None]:
f = titan.plot_ebar(0, y="S1_default", yerr="sS1_default")

In [None]:
f = titan.plot_ebar(2, y="S1_y1", yerr="sS1_y1", xmin=7.7, ymin=25)

In [None]:
titan.result_dfs[1].loc[titan.result_dfs[1]["ctrl"].isna(), "ctrl"] = "U"

sb.set_style("whitegrid")
g = sb.PairGrid(
    titan.result_dfs[1],
    x_vars=["K", "S1_default", "S0_default"],
    y_vars=["K", "S1_default", "S0_default"],
    hue="ctrl",
    palette="Set1",
    diag_sharey=False,
)
g.map_lower(plt.scatter)
g.map_upper(sb.kdeplot, fill=True)
g.map_diag(sb.kdeplot)

In [None]:
with sb.axes_style("darkgrid"):
    g = sb.pairplot(
        titan.result_dfs[2][["S1_y0", "S0_y0", "K", "S1_y1", "S0_y1"]],
        hue="S1_y0",
        palette="Reds",
        corner=True,
        diag_kind="kde",
    )

### combining

In [None]:
res_unk = titan.result_dfs[1].loc[titan.keys_unk].sort_index()
res_unk["well"] = res_unk.index

In [None]:
f = plt.figure(figsize=(24, 14))
# Make the PairGrid
g = sb.PairGrid(
    res_unk,
    x_vars=["K", "S1_default", "S0_default"],
    y_vars="well",
    height=12,
    aspect=0.4,
)
# Draw a dot plot using the stripplot function
g.map(sb.stripplot, size=14, orient="h", palette="Set2", edgecolor="auto")

# Use the same x axis limits on all columns and add better labels
# g.set(xlim=(0, 25), xlabel="Crashes", ylabel="")

# Use semantically meaningful titles for the columns
titles = ["$pK_a$", "B$_{neutral}$", "B$_{anionic}$"]

for ax, title in zip(g.axes.flat, titles, strict=False):
    # Set a different title for each axes
    ax.set(title=title)

    # Make the grid horizontal instead of vertical
    ax.xaxis.grid(False)
    ax.yaxis.grid(True)

sb.despine(left=True, bottom=True)