In [1]:
"""
grouped_kinetics.py
~~~~~~~~~~~~~~~~~~~
Aggregate CHO fed-batch data **by Clone × sampling time (t_hr)** and compute
mean ± SD for every numeric column—including the latest balances, yields and
specific rates (q_Gln, q_Glu, q_rP).

Workflow
--------
1. Load `outputs/interval_kinetics.csv` generated by `interval_kinetics.py`.
2. Identify all numeric columns except `t_hr`.
3. Group data by **Clone × t_hr** and calculate:
   • Mean (suffix *_avg*)  
   • Standard deviation (suffix *_sd*)
4. Flatten the MultiIndex columns and reset the index.
5. Save the aggregated table to disk.

Inputs
------
outputs/interval_kinetics.csv  
required: Clone, t_hr, all kinetic columns produced by
`interval_kinetics.py` including the new IVCC_interval column (08 Aug 2025 release) .

Outputs
-------
outputs/results_agg_by_clone_time.csv  
(one row per Clone × time with mean and SD columns)

Author
------
Emiliano Balderas R. | 16 Jul 2025  
Last edit : 06 Aug 2025 — docstring update for new kinetics
"""

import pandas as pd
import numpy as np
from pathlib import Path
import os

# ───── Configuration ─────────────────────────────────────────────────── #
INPUT_FILE  = Path("outputs/interval_kinetics.csv")
OUTPUT_FILE = Path("outputs/results_agg_by_clone_time.csv")

# ───── Load data ─────────────────────────────────────────────────────── #
if not INPUT_FILE.exists():
    raise FileNotFoundError(
        f"❌ Input file not found:\n  {INPUT_FILE}\n"
        "Run the updated `interval_kinetics.py` first."
    )

df = pd.read_csv(INPUT_FILE)

# ───── Identify numeric columns (exclude t_hr) ───────────────────────── #
numeric_cols = (
    df.select_dtypes(include="number")
      .columns.difference(["t_hr"])
)

# ───── Group by Clone × t_hr and calculate mean ± SD ─────────────────── #
agg_df = (
    df.groupby(["Clone", "t_hr"], observed=True)[numeric_cols]
      .agg(["mean", "std"])
      .rename(columns={"mean": "avg", "std": "sd"})
)

# ───── Flatten MultiIndex column names ───────────────────────────────── #
agg_df.columns = [f"{var}_{stat}" for var, stat in agg_df.columns]

# ───── Reset index to restore columns ───────────────────────────────── #
agg_df = agg_df.reset_index()

# ───── Save result ───────────────────────────────────────────────────── #
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)
agg_df.to_csv(OUTPUT_FILE, index=False)

if __name__ == "__main__":
    print(f"✓ Aggregated rows: {agg_df.shape[0]}  |  columns: {agg_df.shape[1]}")
    print(f"✓ Saved to:\n  {OUTPUT_FILE}")


✓ Aggregated rows: 60  |  columns: 110
✓ Saved to:
  outputs\results_agg_by_clone_time.csv


In [2]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
plot_grouped.py
~~~~~~~~~~~~~~~
Mean ± SD visualisation of CHO fed-batch kinetics **grouped by Clone × Time**.

Workflow
--------
1. Load the aggregated table produced by `grouped_kinetics.py`
   (`outputs/results_agg_by_clone_time.csv`).
2. Generate three families of plots:
   • *Time-course* – line + error bars for raw variables  
   • *Kinetics*    – line + error bars for derived rates, yields, balances  
   • *Correlations* – scatter with x/y error bars (mean ± SD) between
     selected metric pairs
3. Save every figure as a high-resolution PNG in the directory tree below.

Inputs
------
outputs/results_agg_by_clone_time.csv  
(required columns: `Clone`, `t_hr`, plus `{metric}_avg` and `{metric}_sd`
for each metric defined in the code).

Outputs
-------
PNG files in `outputs/figures_agg/`
├── time/       # raw variables vs. time  
├── kinetics/  # derived metrics vs. time  
└── corr/       # metric-to-metric correlations

Author
------
Emiliano Balderas R. | 16 Jul 2025  
Last edit : 07 Aug 2025 — switch to rP_mg_L & safe legend
"""

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# ───── Configuration ─────────────────────────────────────────────────── #
CSV_PATH   = Path("outputs/results_agg_by_clone_time.csv")
FIGURE_DIR = Path("outputs/figures_agg")
SUBFOLDERS = ["time", "kinetics", "corr"]
FIGSIZE, DPI = (8, 6), 300
AXES_RECT  = [0.15, 0.15, 0.78, 0.78]
PALETTE    = "tab10"
RATE_SET   = {"mu", "q_G", "q_L", "q_Gln", "q_Glu", "q_rP"}

sns.set_style("whitegrid")

# ───── Load data ─────────────────────────────────────────────────────── #
if not CSV_PATH.exists():
    raise FileNotFoundError(
        f"❌ Aggregated file not found:\n  {CSV_PATH}\n"
        "Run `grouped_kinetics.py` first.")
agg_df = pd.read_csv(CSV_PATH)

# ───── Folders & palette ─────────────────────────────────────────────── #
for sub in SUBFOLDERS:
    (FIGURE_DIR / sub).mkdir(parents=True, exist_ok=True)

clones = agg_df["Clone"].unique().tolist()
COLOR  = dict(zip(clones, sns.color_palette(PALETTE, len(clones))))

# ───── Helpers ───────────────────────────────────────────────────────── #
def plot_line_with_error(ax, x, y, yerr, label, color):
    ax.errorbar(x, y, yerr=yerr, fmt="-o", capsize=3,
                lw=1.5, markersize=5, label=label, color=color)

def safe_legend(ax, **kwargs):
    h, _ = ax.get_legend_handles_labels()
    if h: ax.legend(**kwargs)

# ───── 1. Time-course ────────────────────────────────────────────────── #
PLOT_TIME = [
    ("VCD",     r'VCD (cells·mL$^{-1}$)',      "Viable Cell Density"),
    ("Glc_mM",  r'Glucose (mM)',               "Glucose Concentration"),
    ("Lac_mM",  r'Lactate (mM)',               "Lactate Concentration"),
    ("Gln_mM",  r'Glutamine (mM)',             "Glutamine Concentration"),
    ("Glu_mM",  r'Glutamate (mM)',             "Glutamate Concentration"),
    ("rP_mg_L", r'Protein (mg·L$^{-1}$)',      "Recombinant Protein"),  # ← name fixed
    ("Viab_pct",  r'Viability (%)',            "Cell Viability"),
    ("GFP_mean", r'GFP (a.u.)',                "GFP Mean Fluorescence"),
    ("TMRM_mean",r'TMRM (a.u.)',               "TMRM Mean Fluorescence"),
]

for var, ylab, title in PLOT_TIME:
    avg, sd = f"{var}_avg", f"{var}_sd"
    if {avg, sd}.difference(agg_df.columns): continue

    fig = plt.figure(figsize=FIGSIZE, dpi=DPI); ax = fig.add_axes(AXES_RECT)
    for cl in clones:
        g = agg_df[agg_df["Clone"] == cl]
        plot_line_with_error(ax, g["t_hr"], g[avg], g[sd], cl, COLOR[cl])

    ax.set(xlabel="Time (h)", ylabel=ylab, title=title, xlim=(0, None))
    safe_legend(ax, title="Clone")
    fig.savefig(FIGURE_DIR / "time" / f"{var}_avg_sd.png"); plt.close(fig)

print("✓ Time trends saved in ./outputs/figures_agg/time")

# ───── 2. Kinetic parameters ─────────────────────────────────────────── #
PLOT_KIN = [
    # Growth & IVCD
    ("mu",        r'μ (h$^{-1}$)',                      "Specific Growth Rate"),
    ("IVCD_tot",  r'IVCD (cells·h·mL$^{-1}$)',                    "Cumulative IVCD"),
    ("dX",        r'ΔX (cells)',                        "Net Cell Change"),
    # Balances
    ("dG",        r'ΔGlucose (mol)',                    "Net Glucose Consumption"),
    ("dL",        r'ΔLactate (mol)',                    "Net Lactate Production"),
    ("dQln",      r'ΔGlutamine (mol)',                  "Net Glutamine Consumption"),  # NEW
    ("dQlu",      r'ΔGlutamate (mol)',                  "Net Glutamate Production"),   # NEW
    ("dP",        r'ΔProtein (g)',                      "Net rP Accumulation"),        # NEW
    # Specific rates
    ("q_G",       r'q$_G$ (pmol·cell$^{-1}$·h$^{-1}$)', "Specific Glucose Consumption"),
    ("q_L",       r'q$_L$ (pmol·cell$^{-1}$·h$^{-1}$)', "Specific Lactate Production"),
    ("q_Gln",     r'q$_{Gln}$ (pmol·cell$^{-1}$·h$^{-1}$)',"Specific Glutamine Consumption"), # NEW
    ("q_Glu",     r'q$_{Glu}$ (pmol·cell$^{-1}$·h$^{-1}$)',"Specific Glutamate Production"),  # NEW
    ("q_rP",      r'q$_{rP}$ (pg·cell$^{-1}$·h$^{-1}$)',   "Specific rP Production"),         # NEW
    # Yields
    ("Y_XG",      r'Y$_{X/G}$ (cells·mol$^{-1}$)',      "Yield on Glucose"),
    ("Y_XL",      r'Y$_{X/L}$ (cells·mol$^{-1}$)',      "Yield on Lactate"),
    ("Y_XQln",    r'Y$_{X/Gln}$ (cells·mol$^{-1}$)',    "Yield on Glutamine"),               # NEW
    ("Y_XQlu",    r'Y$_{X/Glu}$ (cells·mol$^{-1}$)',    "Yield on Glutamate"),               # NEW
]

for var, ylab, title in PLOT_KIN:
    avg, sd = f"{var}_avg", f"{var}_sd"
    if {avg, sd}.difference(agg_df.columns): continue

    fig = plt.figure(figsize=FIGSIZE, dpi=DPI)
    ax  = fig.add_axes(AXES_RECT)
    for cl in clones:
        g = agg_df[(agg_df["Clone"] == cl) & (~agg_df[avg].isna())]
        if g.empty: continue
        plot_line_with_error(ax, g["t_hr"], g[avg], g[sd], cl, COLOR[cl])

    ax.set_xlabel("Time (h)")
    ax.set_ylabel(ylab)
    ax.set_title(title)
    ax.set_xlim(left=0)
    ax.legend(title="Clone")
    fig.savefig(FIGURE_DIR / "kinetics" / f"{var}_avg_sd.png")
    plt.close(fig)

print("✓ Kinetics saved in ./outputs/figures_agg/kinetics")

# ───── 3. Correlation plots (mean ± SD) ───────────────────────────────── #
PLOT_CORR = [
    # Growth vs rates
    ("mu",  "q_G",     r'μ (h$^{-1}$)',                     r'q$_G$ (pmol·cell$^{-1}$·h$^{-1}$)', "μ vs. q$_G$"),
    ("mu",  "q_L",     r'μ (h$^{-1}$)',                     r'q$_L$ (pmol·cell$^{-1}$·h$^{-1}$)', "μ vs. q$_L$"),
    ("mu",  "q_Gln",   r'μ (h$^{-1}$)',                     r'q$_{Gln}$ (pmol·cell$^{-1}$·h$^{-1}$)',"μ vs. q$_{Gln}$"), # NEW
    ("mu",  "q_Glu",   r'μ (h$^{-1}$)',                     r'q$_{Glu}$ (pmol·cell$^{-1}$·h$^{-1}$)',"μ vs. q$_{Glu}$"), # NEW
    ("mu",  "q_rP",    r'μ (h$^{-1}$)',                     r'q$_{rP}$ (pg·cell$^{-1}$·h$^{-1}$)', "μ vs. q$_{rP}$"),   # NEW
    # Rate-rate comparisons
    ("q_G", "q_L",     r'q$_G$ (pmol·cell$^{-1}$·h$^{-1}$)',r'q$_L$ (pmol·cell$^{-1}$·h$^{-1}$)', "q$_G$ vs. q$_L$"),
    ("q_Gln","q_Glu",  r'q$_{Gln}$ (pmol·cell$^{-1}$·h$^{-1}$)',r'q$_{Glu}$ (pmol·cell$^{-1}$·h$^{-1}$)',"q$_{Gln}$ vs. q$_{Glu}$"), # NEW
    ("q_G", "q_rP",    r'q$_G$ (pmol·cell$^{-1}$·h$^{-1}$)', r'q$_{rP}$ (pg·cell$^{-1}$·h$^{-1}$)',"q$_G$ vs. q$_{rP}$"),          # NEW
]

for x, y, xl, yl, title in PLOT_CORR:
    xm, xs, ym, ys = f"{x}_avg", f"{x}_sd", f"{y}_avg", f"{y}_sd"
    if {xm, xs, ym, ys}.difference(agg_df.columns): continue

    fig = plt.figure(figsize=FIGSIZE, dpi=DPI)
    ax  = fig.add_axes(AXES_RECT)
    for cl in clones:
        g = agg_df[agg_df["Clone"] == cl]
        ax.errorbar(g[xm], g[ym], xerr=g[xs], yerr=g[ys],
                    fmt="o", capsize=3, label=cl, color=COLOR[cl])

    ax.set_xlabel(xl)
    ax.set_ylabel(yl)
    ax.set_title(title)
    if x in RATE_SET: ax.set_xlim(left=0)          # MOD
    if y in RATE_SET: ax.set_ylim(bottom=0)        # MOD
    ax.legend(title="Clone")
    fig.savefig(FIGURE_DIR / "corr" / f"{x}_vs_{y}_avg_sd.png")
    plt.close(fig)

print("✓ Correlations saved in ./outputs/figures_agg/corr")


✓ Time trends saved in ./outputs/figures_agg/time


  ax.legend(title="Clone")


✓ Kinetics saved in ./outputs/figures_agg/kinetics
✓ Correlations saved in ./outputs/figures_agg/corr
