In [3]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
interval_kinetics.py
~~~~~~~~~~~~~~~~~~~~
Interval-to-interval kinetic analysis for CHO fed-batch cultures.

Workflow
--------
1. Load cleaned CHO fed-batch data from `data/data.csv` (skips metadata row).
2. Parse `is_post_feed` to identify pre- vs post-feed sampling points.
3. For each Clone × Rep:
   • Compute growth rate (μ, h⁻¹)
   • Estimate integrated viable cell density (IVCD, cell·h)
   • Calculate dX, dGlc, dLac, yields, and specific rates (qS)
4. Save enriched DataFrame to `outputs/interval_kinetics.csv`.

Outputs
-------
CSV with new kinetic columns in `./outputs/`.

Author
------
Emiliano Balderas R. | 16 Jul 2025
"""

import numpy as np
import pandas as pd
from pathlib import Path
import os

# ───── Configuration ───────────────────────────────────────────────────── #
DATA_FILE = Path("data/data.csv")
OUTFILE   = Path("outputs/interval_kinetics.csv")

MM_GLUCOSE = 180.156  # g/mol
MM_LACTATE = 90.080   # g/mol

KIN_COLS = [
    "mu", "IVCD_tot", "dX", "dG", "dL",
    "Y_XG", "Y_XL", "q_G", "q_L"
]

# ───── Load data ───────────────────────────────────────────────────────── #
if not DATA_FILE.exists():
    raise FileNotFoundError(f"❌ Input file not found:\n  {DATA_FILE}")

df = (
    pd.read_csv(DATA_FILE, skiprows=1)
      .assign(
          t_hr  = lambda d: pd.to_numeric(d["t_hr"], errors="coerce"),
          Rep   = lambda d: pd.Categorical(
                     pd.to_numeric(d["Rep"], errors="coerce"),
                     categories=[1, 2, 3], ordered=True),
          Clone = lambda d: d["Clone"].astype("category"),
          Notes = lambda d: d["Notes"].astype(str).str.strip(),
          Date  = lambda d: pd.to_datetime(d["Date"], format="%d/%m/%Y", errors="coerce"),
          Timestamp = lambda d: d["Timestamp"].astype(str).str.strip(),
          is_post_feed = lambda d: (
              d["is_post_feed"]
                .fillna(False)
                .apply(lambda x: str(x).strip().lower() in {"true", "t", "1"})
          )
      )
      .sort_values(["Clone", "Rep", "t_hr"], ignore_index=True)
)

# ───── Unit conversions ─────────────────────────────────────────────────── #
df["Glc_mM"]          = df["Glc_g_L"] / MM_GLUCOSE * 1e3
df["Lac_mM"]          = df["Lac_g_L"] / MM_LACTATE * 1e3
df["Glucose_mol_mL"]  = df["Glc_mM"] * 1e-6
df["Lactate_mol_mL"]  = df["Lac_mM"] * 1e-6
df[KIN_COLS]          = np.nan

# ───── Kinetic calculations ─────────────────────────────────────────────── #
for (clone, rep), group in df.groupby(["Clone", "Rep"], observed=True, sort=False):
    g = group.sort_values("t_hr").reset_index()
    idx_df = g["index"]

    for i in range(1, len(g)):
        t1 = g.loc[i]

        if t1["t_hr"] <= 72:  # batch phase
            t0 = g.loc[i - 1]

        elif not t1["is_post_feed"]:  # pre-feed
            pre_feed = g[(g["t_hr"] < t1["t_hr"]) & g["is_post_feed"]]
            if pre_feed.empty:
                continue
            t0 = pre_feed.iloc[-1]

        else:  # post-feed → skip
            continue

        Δt = t1["t_hr"] - t0["t_hr"]
        if Δt <= 0:
            continue

        # Growth rate
        mu = (np.log(t1["VCD"]) - np.log(t0["VCD"])) / Δt

        # Total balances
        dX = t1["VCD"] * t1["Vol_mL"] - t0["VCD"] * t0["Vol_mL"]
        dG = t0["Glucose_mol_mL"] * t0["Vol_mL"] - t1["Glucose_mol_mL"] * t1["Vol_mL"]
        dL = t1["Lactate_mol_mL"] * t1["Vol_mL"] - t0["Lactate_mol_mL"] * t0["Vol_mL"]

        # Yields
        Y_XG = dX / dG if dG else np.nan
        Y_XL = dX / dL if dL else np.nan

        # Integrated viable cell density
        ivc_mL   = ((t0["VCD"] + t1["VCD"]) / 2) * Δt
        IVCD_tot = ivc_mL * ((t0["Vol_mL"] + t1["Vol_mL"]) / 2)

        # Specific rates
        q_G = (dG * 1e12) / IVCD_tot if IVCD_tot else np.nan
        q_L = (dL * 1e12) / IVCD_tot if IVCD_tot else np.nan

        df.loc[idx_df[i], KIN_COLS] = [
            mu, IVCD_tot, dX, dG, dL, Y_XG, Y_XL, q_G, q_L
        ]

# ───── Save and summary ─────────────────────────────────────────────────── #
OUTFILE.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(OUTFILE, index=False)

if __name__ == "__main__":
    n_valid = df["mu"].notna().sum()
    print(f"\n✓ Intervals analyzed: {n_valid}")
    print(f"✓ Kinetic file saved to:\n  {OUTFILE}")



✓ Intervals analyzed: 36
✓ Kinetic file saved to:
  outputs\interval_kinetics.csv


In [8]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
plot_raw.py
~~~~~~~~~~~
Generate per-sample scatter plots (Clone × Rep) for CHO fed-batch data.

Plots include:
• Time-course scatter plots (raw values)
• Kinetic parameter trends
• Correlation plots

Assumes `outputs/interval_kinetics.csv` has been generated by
`interval_kinetics.py`.

Outputs
-------
Figures saved in:
• outputs/figures_raw/time/
• outputs/figures_raw/kinetics/
• outputs/figures_raw/corr/

Author
------
Emiliano Balderas R. | 16 Jul 2025
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# ───── Configuration ───────────────────────────────────────────────────── #
CSV_PATH = Path("outputs/interval_kinetics.csv")
FIGURE_DIR = Path("outputs/figures_raw")
SUBFOLDERS = ["time", "kinetics", "corr"]
FIGSIZE = (8, 6)
DPI = 300
PALETTE = "tab10"
AXES_RECT = [0.15, 0.15, 0.78, 0.78]

sns.set_style("whitegrid")

SHAPE_MAP = {1: "o", 2: "s", 3: "D"}  # markers by replicate

# ───── Load data ───────────────────────────────────────────────────────── #
if not CSV_PATH.exists():
    raise FileNotFoundError(f"❌ File not found:\n  {CSV_PATH}")

df = pd.read_csv(CSV_PATH)

# ───── Set up output folders ───────────────────────────────────────────── #
for sub in SUBFOLDERS:
    (FIGURE_DIR / sub).mkdir(parents=True, exist_ok=True)

# ───── Color palette by clone ──────────────────────────────────────────── #
clones = df["Clone"].unique().tolist()
colors = sns.color_palette(PALETTE, len(clones))
COLOR = dict(zip(clones, colors))

# ───── Helper: scatter plot by clone × rep ────────────────────────────── #
def scatter_by_rep(ax, data, x, y):
    for cl, g_cl in data.groupby("Clone", observed=True, sort=False):
        for rp, g_rp in g_cl.groupby("Rep", observed=True, sort=False):
            ax.scatter(
                g_rp[x], g_rp[y],
                color=COLOR[cl],
                marker=SHAPE_MAP.get(rp, "o"),
                s=65, edgecolor="white", linewidth=0.4,
                label=f"{cl}-rep{rp}" if ax.get_legend() is None else "",
            )

# ───── 1. Raw time-course plots ───────────────────────────────────────── #
PLOT_TIME = [
    ("VCD",       r'VCD (cells·mL$^{-1}$)',      "Viable Cell Density"),
    ("Viab_pct",  r'Viability (%)',              "Cell Viability"),
    ("Glc_mM",    r'Glucose (mM)',               "Glucose Concentration"),
    ("Lac_mM",    r'Lactate (mM)',               "Lactate Concentration"),
    ("Gln_mM",    r'Glutamine (mM)',             "Glutamine Concentration"),
    ("Glu_mM",    r'Glutamate (mM)',             "Glutamate Concentration"),
    ("GFP_mean",  r'GFP (a.u.)',                 "GFP Mean Fluorescence"),
    ("TMRM_mean", r'TMRM (a.u.)',                "TMRM Mean Fluorescence"),
]

for var, ylab, title in PLOT_TIME:
    if var not in df.columns:
        print(f"⚠️  '{var}' not found; skipping.")
        continue

    fig = plt.figure(figsize=FIGSIZE, dpi=DPI)
    ax = fig.add_axes(AXES_RECT)

    scatter_by_rep(ax, df, "t_hr", var)
    ax.set_xlabel("Time (h)")
    ax.set_ylabel(ylab)
    ax.set_title(title)
    ax.set_xlim(left=0)
    ax.legend(title="Clone–Rep", fontsize=8)

    fig.savefig(FIGURE_DIR / "time" / f"{var}_raw.png")
    plt.close(fig)

print("✓ Time trends saved in ./outputs/figures_raw/time")

# ───── 2. Kinetic parameters vs. time ───────────────────────────────────── #
PLOT_KIN = [
    ("mu",        r'μ (h$^{-1}$)',                     "Specific Growth Rate"),
    ("IVCD_tot",  r'IVCD (cells·h)',                   "Integral Viable Cell Density"),
    ("dX",        r'ΔX (cells)',                       "Net Cell Change"),
    ("dG",        r'ΔGlucose (mol)',                   "Net Glucose Consumption"),
    ("dL",        r'ΔLactate (mol)',                   "Net Lactate Production"),
    ("q_G",       r'q$_G$ (pmol·cell$^{-1}$·h$^{-1}$)',"Specific Glucose Consumption"),
    ("q_L",       r'q$_L$ (pmol·cell$^{-1}$·h$^{-1}$)',"Specific Lactate Production"),
    ("Y_XG",      r'Y$_{X/G}$ (cells·mol$^{-1}$)',     "Yield on Glucose"),
    ("Y_XL",      r'Y$_{X/L}$ (cells·mol$^{-1}$)',     "Yield on Lactate"),
]

for var, ylab, title in PLOT_KIN:
    if var not in df.columns:
        continue

    fig = plt.figure(figsize=FIGSIZE, dpi=DPI)
    ax = fig.add_axes(AXES_RECT)

    scatter_by_rep(ax, df, "t_hr", var)
    ax.set_xlabel("Time (h)")
    ax.set_ylabel(ylab)
    ax.set_title(title)
    ax.set_xlim(left=0)
    ax.legend(title="Clone–Rep", fontsize=8)

    fig.savefig(FIGURE_DIR / "kinetics" / f"{var}_raw.png")
    plt.close(fig)

print("✓ Kinetic plots saved in ./outputs/figures_raw/kinetics")

# ───── 3. Correlation plots ────────────────────────────────────────────── #
PAIR_CORR = [
    ("mu",  "q_G",        r'μ (h$^{-1}$)',                     r'q$_G$ (pmol·cell$^{-1}$·h$^{-1}$)', "μ vs. q$_G$"),
    ("mu",  "q_L",        r'μ (h$^{-1}$)',                     r'q$_L$ (pmol·cell$^{-1}$·h$^{-1}$)', "μ vs. q$_L$"),
    ("mu",  "GFP_mean",   r'μ (h$^{-1}$)',                     r'GFP (a.u.)',                       "μ vs. GFP"),
    ("mu",  "TMRM_mean",  r'μ (h$^{-1}$)',                     r'TMRM (a.u.)',                      "μ vs. TMRM"),
    ("q_G", "q_L",        r'q$_G$ (pmol·cell$^{-1}$·h$^{-1}$)',r'q$_L$ (pmol·cell$^{-1}$·h$^{-1}$)', "q$_G$ vs. q$_L$"),
]

for x, y, xl, yl, title in PAIR_CORR:
    if {x, y}.difference(df.columns):
        continue

    fig = plt.figure(figsize=FIGSIZE, dpi=DPI)
    ax = fig.add_axes(AXES_RECT)

    scatter_by_rep(ax, df, x, y)
    ax.set_xlabel(xl)
    ax.set_ylabel(yl)
    ax.set_title(title)

    if x in {"mu", "q_G", "q_L"}:
        ax.set_xlim(left=0)
    if y in {"mu", "q_G", "q_L"}:
        ax.set_ylim(bottom=0)

    ax.legend(title="Clone–Rep", fontsize=8)

    fig.savefig(FIGURE_DIR / "corr" / f"{x}_vs_{y}_raw.png")
    plt.close(fig)

print("✓ Correlations saved in ./outputs/figures_raw/corr")


⚠️  'GFP_mean' not found; skipping.
⚠️  'TMRM_mean' not found; skipping.
✓ Time trends saved in ./outputs/figures_raw/time
✓ Kinetic plots saved in ./outputs/figures_raw/kinetics
✓ Correlations saved in ./outputs/figures_raw/corr
