# PRTECAN Module Deep Dive Tutorial

This tutorial explores the `prtecan` module for processing Tecan plate reader data with a focus on:
- File parsing and data structures
- Titration curve analysis
- Advanced fitting capabilities
- Visualization techniques

In [None]:
# Magic commands for development
%load_ext autoreload
%autoreload 2
    
# Setup
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np

from clophfit import prtecan

# Configure notebook
%matplotlib inline
plt.style.use("seaborn-v0_8")
data_dir = Path("../../tests/Tecan/140220")

## 1. Core Data Structures

In [None]:
# 1.1 Labelblock - Fundamental data unit
file_path = data_dir / "pH6.5_200214.xls"
csvl = prtecan.read_xls(file_path)
idxs = prtecan.lookup_listoflines(csvl)
lb = prtecan.Labelblock(csvl[idxs[0] : idxs[1]], str(file_path))

print("--- Labelblock Metadata ---")
for k, v in lb.metadata.items():
    print(f"{k}: {v.value}")

print("\nSample Data (A01-H12):")
print({k: v for i, (k, v) in enumerate(lb.data.items()) if i < 12})

## 2. File Processing Pipeline

In [None]:
# 2.1 Complete processing workflow
tit = prtecan.Titration.fromlistfile(data_dir / "list.pH.csv", is_ph=True)
tit.load_scheme(data_dir / "scheme.txt")
tit.load_additions(data_dir / "additions.pH")

print(
    f"Titration with {len(tit.tecanfiles)} files and {len(tit.labelblocksgroups)} label groups"
)
print(f"Buffer wells: {tit.scheme.buffer}")
print(f"pH range: {tit.x.min():.1f} to {tit.x.max():.1f}")

## 3. Data Analysis Techniques

In [None]:
# 3.1 Accessing processed data
well = "D10"
data = {
    "pH": tit.x,
    "Signal (raw)": tit.labelblocksgroups[1].data_nrm[well],
    "Signal (processed)": tit.data[1][well],
}

plt.figure(figsize=(10, 5))
plt.plot(data["pH"], data["Signal (raw)"], "o-", label="Raw")
plt.plot(data["pH"], data["Signal (processed)"], "s-", label="Processed")
plt.xlabel("pH")
plt.ylabel("Fluorescence")
plt.title(f"Data Processing Pipeline for Well {well}")
plt.legend()
plt.grid(True)

## 4. Fitting Framework

In [None]:
# 4.1 Comparing fitting methods
tit.params.bg = True
tit.params.dil = True
tit.params.nrm = True

# Run all fitting methods
result_single = tit.results[1][well]
result_global = tit.result_global[well]
result_odr = tit.result_odr[well]

print(
    f"Single fit Kd: {result_single.result.params['K'].value:.2f} ± {result_single.result.params['K'].stderr:.2f}"
)
print(
    f"Global fit Kd: {result_global.result.params['K'].value:.2f} ± {result_global.result.params['K'].stderr:.2f}"
)
print(
    f"ODR fit Kd: {result_odr.result.params['K'].value:.2f} ± {result_odr.result.params['K'].stderr:.2f}"
)

plt.figure()
result_single.figure
plt.figure
result_global.figure.tight_layout()

In [None]:
data_dir

In [None]:
data_dir = Path("../../tests/Tecan/140220")
data_dir = Path("/home/dati/arslanbaeva/data/raw/L2/")

tit = prtecan.Titration.fromlistfile(data_dir / "list.pH.csv", is_ph=1)
tit.load_additions(data_dir / "additions.pH")
tit.load_scheme(data_dir / "scheme.txt")
tit.params.bg_mth = "meansd"
tit.bg_err

In [None]:
tit.result_global.compute_all()

In [None]:
tit.result_global.plot_k()

In [None]:
def output_fr(fr):
    print(fr.result.redchi)
    print(fr.dataset)
    return fr.figure


In [None]:
from clophfit.binding.fitting import fit_binding_glob, fit_binding_glob_recursive, fit_binding_glob_recursive_outlier, fit_binding_glob_reweighted, outlier2
from clophfit.binding.fitting import DataArray, Dataset, weight_da, weight_multi_ds_titration, outlier_glob, fit_binding_pymc, fit_binding_pymc2, fit_binding_pymc_compare
import arviz as az

In [None]:
k = "A01"

ds = tit._create_global_ds(k)
ds["y1"].y_err.mean(), ds["y2"].y_err.mean()
ds

In [None]:
fit_binding_glob(ds).figure

In [None]:
ds["y1"].y_errc

In [None]:
fr = outlier2(ds, k, plot_z_scores=1, threshold=3.0)
fr.figure

In [None]:
fr.result.logger = 1

In [None]:
print(fr.result.redchi)
print(fr.result.chisqr)
fr.dataset

In [None]:
ds2 = tit._create_ds(k, 2)
ds2

In [None]:
fit_binding_glob(ds2).figure

In [None]:
k = "A01"
fr = tit.result_global[k]
fr.figure

In [None]:
{"y1": tit.bg_err[1].mean(), "y2": tit.bg_err[2].mean()}


In [None]:
n_sd = 0.15 / fr.result.params["K"].stderr
print(n_sd)

# Run the model with a single noise scaling factor
#trace_single = fit_binding_pymc_compare(fr, n_sd=max(n_sd,1), n_xerr=.682, learn_separate_y_mag=False)

# Run the model with separate noise scaling factors for each label
#trace_separate_shot = fit_binding_pymc_compare(fr, n_sd=max(n_sd,1), n_xerr=.682, learn_separate_y_mag=True)
trace_separate_shot = fit_binding_pymc_compare(fr, {"y1": tit.bg_err[1].mean(), "y2": tit.bg_err[2].mean()}, n_sd=max(n_sd,1), n_xerr=.682, learn_separate_y_mag=True)


In [None]:
az.summary(trace_separate_shot)

In [None]:
az.summary(trace_single)

In [None]:
az.summary(trace_separate)

In [None]:
# You can pass the traces directly to az.compare
comparison_results = az.compare({"single_y_mag": trace_separate_shot, "separate_y_mag": trace_separate})

# The result is a pandas DataFrame.
# The best model has the lowest 'loo' or 'waic' value.
# The 'd_loo' column shows the difference from the best model.
comparison_results

In [None]:
n_sd = 0.15 / fr.result.params["K"].stderr
print(n_sd)
fr_mcmc2 = fit_binding_pymc(fr, n_sd=max(n_sd,1), n_xerr=.682)

In [None]:
import arviz as az
df = az.summary(fr_mcmc2.mini)
fr_mcmc2.figure

In [None]:
df

In [None]:
import arviz as az
az.summary(fr_mcmc.mini)

In [None]:
da1.mask = ~outlier_glob(fr.result.residual, plot_z_scores=1, threshold=2)

In [None]:
output_fr(fit_binding_glob_reweighted(ds, k, threshold=2.25))

## 5. Advanced Features

In [None]:
# 5.1 Bayesian fitting with PyMC
tit.params.mcmc = "single"
result_mcmc = tit.result_mcmc[well]

print("MCMC Results:")
print(f"Kd: {result_mcmc.result.params['K'].value:.2f}")
print(
    f"95% HDI: [{result_mcmc.result.params['K'].min:.2f}, {result_mcmc.result.params['K'].max:.2f}]"
)

# Plot trace
import arviz as az

az.plot_trace(result_mcmc.mini, var_names=["K", "x_true"]);

## 6. Quality Control

In [None]:
# 6.1 Buffer analysis
buffer_plot = tit.buffer.plot(nrm=True, title="Buffer Consistency Check")
plt.show()

# 6.2 Temperature monitoring
temp_plot = tit.plot_temperature()
plt.show()

## 7. Batch Processing

In [None]:
# 7.1 Export all results
from tempfile import mkdtemp

output_dir = Path(mkdtemp())

config = prtecan.TecanConfig(
    out_fp=output_dir, comb=True, lim=None, title="FullAnalysis", fit=True, png=True
)
tit.export_data_fit(config)

print(f"Exported to: {output_dir}")
print("Contents:", *[f.name for f in output_dir.glob("*")], sep="\n- ")

## Advanced Titration Construction

### Handling Files with Non-Identical Metadata

In [None]:
# Load files with different metadata
files = [
    data_dir / "pH6.5_200214.xls",  # Different gain settings
    data_dir / "pH7.08_200214.xls",
]

# Create titration with pH values
mixed_tit = prtecan.Titration(
    [prtecan.Tecanfile(f) for f in files], x=np.array([6.5, 7.08]), is_ph=True
)

# Show metadata differences
print("Metadata Comparison:")
for i, tf in enumerate(mixed_tit.tecanfiles):
    print(
        f"File {i + 1} - Gain:",
        tf.labelblocks[1].metadata["Gain"].value,
        tf.labelblocks[1].metadata["Gain"].unit,
    )

# Verify normalization worked
well = "D10"
print(f"\nNormalized values for {well}:")
print("File 1:", mixed_tit.labelblocksgroups[1].data_nrm[well][0])
print("File 2:", mixed_tit.labelblocksgroups[1].data_nrm[well][1])

## Background Calculation Methods Comparison

In [None]:
mixed_tit = tit

In [None]:
# Configure experiment
mixed_tit.load_scheme(data_dir / "scheme.txt")
mixed_tit.load_additions(data_dir / "additions.pH")

# Test different background methods
methods = ["mean", "meansd", "fit"]
results = {}

plt.figure(figsize=(15, 5))
for i, method in enumerate(methods, 1):
    # Set calculation method
    mixed_tit.params.bg_mth = method

    # Store results
    results[method] = {"bg": mixed_tit.bg[1][0], "processed": mixed_tit.data[1][well]}

    # Plot
    plt.subplot(1, 3, i)
    plt.plot(mixed_tit.x, mixed_tit.data[1][well], "o-", label=method)
    plt.axhline(y=0, color="gray", linestyle="--")
    plt.title(f"Method: {method}")
    plt.xlabel("pH")
    plt.ylabel("Signal")
    plt.legend()
    plt.grid(True)

plt.tight_layout()
plt.show()

# Show numerical comparison
print("Background Values:")
for method, vals in results.items():
    print(f"{method}: {vals['bg']:.1f}")

print("\nFirst Point Values:")
for method, vals in results.items():
    print(f"{method}: {vals['processed'][0]:.1f}")

### Buffer Analysis Comparison

In [None]:
# Visualize buffer fits for each method
plt.figure(figsize=(18, 5))

for i, method in enumerate(methods, 1):
    mixed_tit.params.bg_mth = method

    plt.subplot(1, 3, i)
    df = mixed_tit.buffer.dataframes_nrm[1]

    # Plot raw buffer data
    for col in df.columns:
        if col not in ["fit", "fit_err", "mean", "sem", "Label"]:
            plt.plot(mixed_tit.x, df[col], "o", alpha=0.3)

    # Plot fit results
    plt.plot(mixed_tit.x, df["fit"], "r-", lw=2, label="Fit")
    plt.fill_between(
        mixed_tit.x,
        df["fit"] - df["fit_err"],
        df["fit"] + df["fit_err"],
        color="r",
        alpha=0.2,
    )

    plt.title(f"Buffer {method} method")
    plt.xlabel("pH")
    plt.ylabel("Signal")
    plt.grid(True)

plt.tight_layout()
plt.show()

### Key Observations

1. **Metadata Handling**:
   - Files with different instrument settings (like gain) can be merged after normalization
   - The module automatically handles unit conversions and scaling

2. **Background Methods**:
   - `mean`: Simple average of buffer wells
   - `meansd`: More conservative estimate using mean ± 1SD
   - `fit`: Linear regression of buffer wells vs. pH

3. **Impact on Results**:
   - Different methods can significantly affect baseline correction
   - The fitting method accounts for pH-dependent buffer effects
   - More conservative methods may preserve signal dynamics better

## Key Takeaways

1. **Modular Architecture**: The `prtecan` module processes data through well-defined stages
2. **Reproducible Workflows**: All processing steps are traceable and configurable
3. **Advanced Fitting**: Multiple fitting methods with automatic error propagation
4. **Quality Control**: Built-in tools for monitoring experimental conditions
5. **Scalability**: Handles both single experiments and high-throughput screens