<a href="https://colab.research.google.com/github/jamessutton600613-png/GC/blob/main/Untitled228.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -*- coding: utf-8 -*-
# GQR‚ÄìTDSE (Electron-only, 16-site core), batch over CIFs in /content
# - 40 fs total, dt = 0.02 fs (2000 steps)
# - Outputs plots and CSVs for each CIF and each "condition"
# - No protons (hydrogens) to keep it fast & clean
# ---------------------------------------------------------------

import sys, subprocess, importlib, os, json, math, glob
from pathlib import Path

# --- Ensure gemmi is available ---
try:
    import gemmi
except Exception:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "gemmi"])
    import gemmi

import numpy as np
from scipy.linalg import expm
import matplotlib.pyplot as plt

# -----------------------------
# User knobs
# -----------------------------
IN_DIR = Path("/content")          # where CIF/mmCIF/PDBs live
OUT_ROOT = Path("/content/GQR_OUT")
OUT_ROOT.mkdir(parents=True, exist_ok=True)

TOTAL_FS = 40.0                    # total simulation time [fs]
DT_FS = 0.02                       # time-step [fs]
N_STEPS = int(round(TOTAL_FS/DT_FS))
H_BAR = 6.582119569e-16            # eV*s
DT_S = DT_FS * 1e-15               # convert to seconds

# Base coupling model (electron)
T0 = 10.0                          # eV (baseline coupling scale)
BETA_E = 1.0                       # 1/√Ö (decay constant for electron tunneling)

# Simple gating comb (distance resonances), modest to keep stable
COMB_CENTERS = [0.8, 1.1, 1.5, 2.1, 3.4]
COMB_WIDTH   = 0.18
DUTY_BASE = 0.90

# -----------------------------
# "Conditions" (electron-only surrogates for solvent/isotope)
#   - These lightly tweak water-coupling & duty to emulate environment
# -----------------------------
CONDITIONS = {
    "H2O": {"duty": 0.90, "beta_scale_water": 1.00},
    "D2O": {"duty": 0.88, "beta_scale_water": 1.05},
    "T2O": {"duty": 0.86, "beta_scale_water": 1.08},
    "H2S": {"duty": 0.92, "beta_scale_water": 0.95},  # slightly softer env
    "D2S": {"duty": 0.90, "beta_scale_water": 1.00},
    "T2S": {"duty": 0.88, "beta_scale_water": 1.04},
}

# -----------------------------
# Helpers
# -----------------------------
def read_structure_any(path: Path) -> gemmi.Structure:
    # robust reader; supports .cif/.mmcif/.pdb(.gz)
    st = gemmi.read_structure(str(path))
    # light cleanup only (valid gemmi API)
    st.remove_alternative_conformations()
    st.merge_chain_parts()
    st.remove_empty_chains()
    return st

def _atom_iter(struct: gemmi.Structure):
    for model in struct:
        for chain in model:
            for res in chain:
                for atom in res:
                    yield model, chain, res, atom

def get_psii_core_16(struct: gemmi.Structure):
    """
    Build 16-site coordinate set:
      0-3: Mn1..Mn4
      4:   Ca
      5-9: five Œº-oxo O (bridging two distinct Mn within ~2.4 √Ö)
      10-11: two nearest water oxygens (resname HOH/WAT/H2O) to Mn4Ca centroid
      (No hydrogens in this electron-only version)
    Returns: (coords: np.ndarray [16,3], labels: list[str]) or (None,None) if failed.
    """
    # Collect candidate atoms
    mn_atoms, ca_atoms, oxy_atoms, water_oxy = [], [], [], []

    # Identify atoms
    for _, _, res, atom in _atom_iter(struct):
        el = atom.element.name.upper()
        name = atom.name.strip().upper()
        resn = res.name.upper()
        # Position in √Ö
        pos = np.array([atom.pos.x, atom.pos.y, atom.pos.z], dtype=float)
        if el == "MN":
            mn_atoms.append((pos, res, atom))
        elif el == "CA":
            ca_atoms.append((pos, res, atom))
        elif el == "O":
            if resn in ("HOH", "WAT", "H2O", "DOD", "TIP3", "SOL"):
                water_oxy.append((pos, res, atom))
            else:
                oxy_atoms.append((pos, res, atom))

    if len(mn_atoms) < 4:
        return None, None
    # select 4 closest Mn cluster by pairwise distance (greedy: centroid approach)
    mn_positions = np.array([p for (p,_,_) in mn_atoms])
    # pick 4 by minimal RMS radius from their own centroid: brute force if many, else greedy
    # simple heuristic: choose the 4 whose centroid-to-others sum distance is minimum
    best_idx = None
    best_score = 1e9
    for idxs in _combinations(range(len(mn_positions)), 4):
        pts = mn_positions[list(idxs)]
        c = pts.mean(axis=0)
        score = np.sum(np.linalg.norm(pts - c, axis=1))
        if score < best_score:
            best_score = score
            best_idx = idxs
    Mn_sel = [mn_atoms[i] for i in best_idx]
    Mn_pos = np.array([p for (p,_,_) in Mn_sel])

    # choose nearest Ca to Mn centroid
    if len(ca_atoms) == 0:
        return None, None
    c_mn = Mn_pos.mean(axis=0)
    Ca_sel = min(ca_atoms, key=lambda t: np.linalg.norm(t[0] - c_mn))
    Ca_pos = Ca_sel[0]

    # find Œº-oxo: oxygen within 2.4 √Ö of TWO DIFFERENT Mn atoms
    mu_oxo = []
    for (pos, res, atom) in oxy_atoms:
        d = np.linalg.norm(Mn_pos - pos, axis=1)
        close_mn = np.where(d < 2.4)[0]
        if len(close_mn) >= 2:
            mu_oxo.append((pos, res, atom))
    # Choose 5 best: closest to Mn-centroid
    if len(mu_oxo) < 5:
        return None, None
    mu_oxo.sort(key=lambda t: np.linalg.norm(t[0] - c_mn))
    OX_sel = mu_oxo[:5]
    OX_pos = np.array([p for (p,_,_) in OX_sel])

    # choose 2 nearest water O to Mn4Ca centroid
    cluster_center = np.vstack([Mn_pos, Ca_pos[None,:]]).mean(axis=0)
    if len(water_oxy) < 2:
        return None, None
    water_oxy.sort(key=lambda t: np.linalg.norm(t[0] - cluster_center))
    W_sel = water_oxy[:2]
    W_pos = np.array([p for (p,_,_) in W_sel])

    coords = np.vstack([Mn_pos, Ca_pos[None,:], OX_pos, W_pos])  # (4+1+5+2) = 12, wait we need 16?
    # For a 16-site "electron-only" core (without hydrogens), we can pad with two more near-waters:
    # If at least 4 waters present, take 2 more; else duplicate last two slightly perturbed.
    extra_needed = 16 - coords.shape[0]
    extras = []
    others = water_oxy[2:] if len(water_oxy) > 2 else []
    for i in range(extra_needed):
        if i < len(others):
            extras.append(others[i][0])
        else:
            # fall back: tiny offset on the second water
            extras.append(W_pos[-1] + np.array([0.05*i, 0, 0]))
    if extras:
        coords = np.vstack([coords, np.array(extras)])

    if coords.shape[0] != 16:
        return None, None

    labels = (
        [f"Mn{i+1}" for i in range(4)] +
        ["Ca"] +
        [f"muO{i+1}" for i in range(5)] +
        [f"OW3","OW4"] +
        [f"OWx{i+1}" for i in range(extra_needed)]
    )
    return coords, labels

def _combinations(iterable, r):
    # small local comb generator to avoid importing itertools (keeps it simple)
    pool = tuple(iterable); n = len(pool)
    if r > n: return
    idx = list(range(r))
    yield tuple(pool[i] for i in idx)
    while True:
        for i in reversed(range(r)):
            if idx[i] != i + n - r:
                break
        else:
            return
        idx[i] += 1
        for j in range(i+1, r):
            idx[j] = idx[j-1] + 1
        yield tuple(pool[i] for i in idx)

# -----------------------------
# Hamiltonian pieces
# -----------------------------
def resonance_comb(r):
    # modest multi-Gaussian distance comb, >= 1.0
    val = 0.0
    for c in COMB_CENTERS:
        val += math.exp(- (r - c)**2 / (2*COMB_WIDTH**2))
    return 1.0 + 1.5 * (val / (len(COMB_CENTERS) + 1e-9))

def build_hamiltonian(coords, condition):
    """
    16x16 Hermitian Hamiltonian in eV.
    Diagonal = site energies; Off-diagonal = gated couplings decaying with distance.
    Water-coupling slightly reweighted per 'condition'.
    """
    n = coords.shape[0]
    H = np.zeros((n, n), dtype=np.complex128)

    # Site energies (simple tiers: Mn high, Ca mid, Œº-oxo mid/low, waters low)
    # indices: 0-3 Mn, 4 Ca, 5-9 mu-oxo, 10-15 waters (2 + 2 extras)
    diag = np.array(
        [ 2.0, 2.0, 2.0, 2.0,     # Mn
          0.7,                    # Ca
          1.0, 1.0, 1.0, 1.0, 1.0,# mu-oxo
          0.2, 0.2, 0.2, 0.2, 0.2, 0.2  # waters
        ], dtype=float
    )
    np.fill_diagonal(H, diag)

    duty = condition["duty"]
    beta_water_scale = condition["beta_scale_water"]

    # Precompute pairwise distances
    R = np.linalg.norm(coords[:, None, :] - coords[None, :, :], axis=2)
    # Build couplings
    for i in range(n):
        for j in range(i+1, n):
            r = R[i, j]
            # base decay for electrons
            beta = BETA_E
            # light tweak: if either site is water (10..15), modify decay scale
            if (i >= 10) or (j >= 10):
                beta = BETA_E * beta_water_scale

            Tij = T0 * math.exp(-beta * r) * resonance_comb(r) * duty
            H[i, j] = H[j, i] = Tij
    return H

def propagate_tdse(H, psi, dt_s):
    # U = exp(-i H dt / ƒß)
    U = expm(-1j * H * (dt_s / H_BAR))
    return (U @ psi)

# -----------------------------
# Simulation loop
# -----------------------------
def run_tdse(coords, labels, condition_name, out_dir: Path):
    out_dir.mkdir(parents=True, exist_ok=True)
    H = build_hamiltonian(coords, CONDITIONS[condition_name])

    n = coords.shape[0]
    psi = np.zeros(n, dtype=np.complex128)
    psi[0] = 1.0  # start on Mn1

    t_axis_fs = np.arange(N_STEPS) * DT_FS
    # track water oxygen populations (sites 10..15)
    pop_water = np.zeros(N_STEPS, dtype=float)
    # also track OW3, OW4 if present (10, 11)
    pop_ow3 = np.zeros(N_STEPS, dtype=float)
    pop_ow4 = np.zeros(N_STEPS, dtype=float)

    for k in range(N_STEPS):
        p = np.abs(psi)**2
        pop_water[k] = p[10:16].sum()
        pop_ow3[k] = p[10] if n > 10 else 0.0
        pop_ow4[k] = p[11] if n > 11 else 0.0
        psi = propagate_tdse(H, psi, DT_S)
        psi /= np.linalg.norm(psi)

    # Save CSV
    csv_path = out_dir / f"populations_{condition_name}.csv"
    np.savetxt(csv_path,
               np.column_stack([t_axis_fs, pop_water, pop_ow3, pop_ow4]),
               delimiter=",",
               header="time_fs,pop_water_total,pop_OW3,pop_OW4",
               comments="")

    # Plot
    plt.figure(figsize=(8,5))
    plt.plot(t_axis_fs, 100*pop_water, label="Water O (total)")
    plt.plot(t_axis_fs, 100*pop_ow3, '--', label="OW3")
    plt.plot(t_axis_fs, 100*pop_ow4, '--', label="OW4")
    plt.xlabel("Time (fs)")
    plt.ylabel("Population (%)")
    plt.title(f"GQR‚ÄìTDSE (electron-only) ‚Äî {condition_name}")
    plt.legend()
    plt.tight_layout()
    png_path = out_dir / f"populations_{condition_name}.png"
    plt.savefig(png_path, dpi=200)
    plt.close()

    # Small JSON summary
    summary = {
        "condition": condition_name,
        "N_STEPS": N_STEPS,
        "DT_fs": DT_FS,
        "TOTAL_fs": TOTAL_FS,
        "final_pop_water": float(pop_water[-1]),
        "peak_pop_water": float(pop_water.max())
    }
    with open(out_dir / f"summary_{condition_name}.json", "w") as f:
        json.dump(summary, f, indent=2)

# -----------------------------
# Batch runner
# -----------------------------
def main():
    files = []
    for pat in ("*.cif", "*.mmcif", "*.pdb", "*.cif.gz", "*.mmcif.gz", "*.pdb.gz"):
        files += glob.glob(str(IN_DIR / pat))
    files = sorted(set(files))

    print(f"Sim length: ~{TOTAL_FS:.1f} fs | steps: {N_STEPS} | dt={DT_FS:.3f} fs")
    print(f"üîé Found {len(files)} structure file(s) in {IN_DIR}")

    if not files:
        print("No CIF/mmCIF/PDB files found. Place files in /content and rerun.")
        return

    for idx, f in enumerate(files, 1):
        cif_path = Path(f)
        stem = cif_path.stem.replace(".cif","").replace(".mmcif","")
        print(f"\n=== [{idx}/{len(files)}] {cif_path.name} ===")

        try:
            st = read_structure_any(cif_path)
            coords, labels = get_psii_core_16(st)
            if coords is None:
                print("   ‚ùå Skipping: could not identify 16-site Mn4Ca/Œº-oxo/2√ówater core.")
                continue
            print(f"   -> 16-site coords OK: {coords.shape}")
        except Exception as e:
            print(f"   ‚ùå Skipping (read/parse error): {e}")
            continue

        cif_out = OUT_ROOT / stem
        cif_out.mkdir(parents=True, exist_ok=True)

        # Save picked coordinates for auditing
        np.savetxt(cif_out / "core16_coords.xyz",
                   np.column_stack([coords, np.zeros((coords.shape[0],1))]),
                   fmt="%.6f",
                   header="x y z (√Ö); dummy fourth column")

        # Run all conditions
        for cond in CONDITIONS.keys():
            out_dir = cif_out / cond
            print(f"   ‚Ä¢ {cond}")
            try:
                run_tdse(coords, labels, cond, out_dir)
            except Exception as e:
                print(f"     -> ‚ùå condition '{cond}' failed: {e}")

    print("\n‚úÖ Done. Outputs under:", OUT_ROOT)

# Kick it off
if __name__ == "__main__":
    main()

Sim length: ~40.0 fs | steps: 2000 | dt=0.020 fs
üîé Found 16 structure file(s) in /content

=== [1/16] 4IXQ.cif ===
   ‚ùå Skipping: could not identify 16-site Mn4Ca/Œº-oxo/2√ówater core.

=== [2/16] 6DHE.cif ===
   -> 16-site coords OK: (16, 3)
   ‚Ä¢ H2O
   ‚Ä¢ D2O
   ‚Ä¢ T2O
   ‚Ä¢ H2S
   ‚Ä¢ D2S
   ‚Ä¢ T2S

=== [3/16] 6W1U.cif ===
   -> 16-site coords OK: (16, 3)
   ‚Ä¢ H2O
   ‚Ä¢ D2O
   ‚Ä¢ T2O
   ‚Ä¢ H2S
   ‚Ä¢ D2S
   ‚Ä¢ T2S

=== [4/16] 6W1V.cif ===
   -> 16-site coords OK: (16, 3)
   ‚Ä¢ H2O
   ‚Ä¢ D2O
   ‚Ä¢ T2O
   ‚Ä¢ H2S
   ‚Ä¢ D2S
   ‚Ä¢ T2S

=== [5/16] 7RFY.cif ===
   ‚ùå Skipping: could not identify 16-site Mn4Ca/Œº-oxo/2√ówater core.

=== [6/16] 7RFZ.cif ===
   ‚ùå Skipping: could not identify 16-site Mn4Ca/Œº-oxo/2√ówater core.

=== [7/16] 8EZ5.cif ===
   -> 16-site coords OK: (16, 3)
   ‚Ä¢ H2O
   ‚Ä¢ D2O
   ‚Ä¢ T2O
   ‚Ä¢ H2S
   ‚Ä¢ D2S
   ‚Ä¢ T2S

=== [8/16] 8F4C.cif ===
   -> 16-site coords OK: (16, 3)
   ‚Ä¢ H2O
   ‚Ä¢ D2O
   ‚Ä¢ T2O
   ‚Ä¢ H2S
   ‚Ä¢ D2S
   ‚Ä¢ 

In [None]:
from google.colab import files
import shutil, os

shutil.make_archive("/content/GQR_OUT", "zip", "/content/GQR_OUT")
files.download("/content/GQR_OUT.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import os, glob
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

ROOT = "GQR_OUT"   # change if needed

# Conditions & how we‚Äôll style them
PALETTES = {
    "H2O": {"label":"H$_2$O", "ls":"-",  "lw":2.2},
    "D2O": {"label":"D$_2$O", "ls":"--", "lw":2.0},
    "T2O": {"label":"T$_2$O", "ls":"-.", "lw":2.0},
    "H2S": {"label":"H$_2$S", "ls":"-",  "lw":2.2},
    "D2S": {"label":"D$_2$S", "ls":"--", "lw":2.0},
    "T2S": {"label":"T$_2$S", "ls":"-.", "lw":2.0},
}

GROUPS = {
    "combo_H2O_KIE": ["H2O","D2O","T2O"],
    "combo_H2S_vs_H2O": ["H2O","H2S"],
    "combo_H2S_KIE": ["H2S","D2S","T2S"],
    "combo_H2O_vs_T2S": ["H2O","T2S"],
}

def find_cif_stems(root):
    stems = []
    for p in Path(root).glob("*"):
        if p.is_dir():
            # consider a CIF stem if it has any condition subfolders with a populations csv
            found = False
            for cond in ["H2O","D2O","T2O","H2S","D2S","T2S"]:
                csvp = p / cond / f"populations_{cond}.csv"
                if csvp.exists():
                    found = True
                    break
            if found:
                stems.append(p.name)
    return sorted(stems)

def load_condition_csv(cif_stem, cond):
    csv_path = Path(ROOT) / cif_stem / cond / f"populations_{cond}.csv"
    if not csv_path.exists():
        return None
    df = pd.read_csv(csv_path)
    # Basic sanity fallbacks
    if "time_fs" not in df.columns:
        # Try alternate headers
        for c in df.columns:
            if "time" in c.lower():
                df = df.rename(columns={c:"time_fs"})
                break
    for k in ["P_OW_total","P_OW3","P_OW4"]:
        if k not in df.columns:
            # try forgiving matches
            match = [c for c in df.columns if k.lower() in c.lower()]
            if match:
                df = df.rename(columns={match[0]:k})
    # Only keep what we need
    return df[["time_fs","P_OW_total","P_OW3","P_OW4"]]

def plot_group(cif_stem, group_name, cond_list):
    outdir = Path(ROOT) / cif_stem / "combined"
    outdir.mkdir(parents=True, exist_ok=True)
    figfile = outdir / f"{group_name}.png"

    plt.figure(figsize=(7.6, 5.2))
    have_any = False

    for cond in cond_list:
        df = load_condition_csv(cif_stem, cond)
        if df is None:
            print(f"[{cif_stem}] Missing: {cond} ‚Üí skipping in {group_name}")
            continue
        style = PALETTES.get(cond, {"label":cond, "ls":"-", "lw":2.0})

        # Plot total OW population (solid) + OW3/OW4 (lighter) for context
        plt.plot(df["time_fs"], df["P_OW_total"]*100.0,
                 linestyle=style["ls"], linewidth=style["lw"], label=style["label"])
        # faint sub-traces
        plt.plot(df["time_fs"], df["P_OW3"]*100.0,
                 linestyle=":", linewidth=1.2, alpha=0.5)
        plt.plot(df["time_fs"], df["P_OW4"]*100.0,
                 linestyle=":", linewidth=1.2, alpha=0.5)
        have_any = True

    if not have_any:
        plt.close()
        return None

    plt.title(f"{cif_stem}: {group_name.replace('_',' ')}")
    plt.xlabel("Time (fs)")
    plt.ylabel("Population on water oxygens (%)")
    plt.grid(True, linestyle="--", alpha=0.35)
    plt.legend(title="Condition", ncol=3, frameon=False)
    plt.tight_layout()
    plt.savefig(figfile, dpi=300)
    plt.close()
    return figfile

def write_latex_snippet(cif_stem, made):
    """Create GQR_OUT/<CIF>/combined/figure_snippet.tex with a 2√ó2 panel of the four combos"""
    outdir = Path(ROOT) / cif_stem / "combined"
    texfile = outdir / "figure_snippet.tex"
    # Only include panels that exist
    panels = [p for p in [
        outdir/"combo_H2O_KIE.png",
        outdir/"combo_H2S_vs_H2O.png",
        outdir/"combo_H2S_KIE.png",
        outdir/"combo_H2O_vs_T2S.png"
    ] if p.exists()]

    if not panels:
        return

    tex = r"""
% Auto-generated panel for %s (place inside your figure env or \input directly)
\noindent
\begin{minipage}{\linewidth}\centering
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$O KIE: H$_2$O/D$_2$O/T$_2$O
  \end{minipage}\hfill
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$S vs H$_2$O
  \end{minipage}\\[0.5em]
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$S KIE: H$_2$S/D$_2$S/T$_2$S
  \end{minipage}\hfill
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$O vs T$_2$S (max contrast)
  \end{minipage}
\end{minipage}
""" % (
        cif_stem,
        (outdir/"combo_H2O_KIE.png"),
        (outdir/"combo_H2S_vs_H2O.png"),
        (outdir/"combo_H2S_KIE.png"),
        (outdir/"combo_H2O_vs_T2S.png"),
    )

    with open(texfile, "w") as f:
        f.write(tex)
    print(f"[{cif_stem}] wrote LaTeX snippet ‚Üí {texfile}")

def main():
    cif_stems = find_cif_stems(ROOT)
    if not cif_stems:
        print("No CIF folders found under", ROOT)
        return
    print("CIFs:", cif_stems)

    for stem in cif_stems:
        made = []
        for group_name, conds in GROUPS.items():
            out = plot_group(stem, group_name, conds)
            if out is not None:
                made.append(out)
        if made:
            write_latex_snippet(stem, made)

if __name__ == "__main__":
    main()

CIFs: ['6DHE', '6W1U', '6W1V', '8EZ5', '8F4C', '8F4D', '8F4E', '8F4F', '8F4G', '8F4H', '8F4I', '8F4J', '8F4K']


KeyError: "['P_OW_total'] not in index"

<Figure size 760x520 with 0 Axes>

In [None]:
import os
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

ROOT = "GQR_OUT"   # change if needed

PALETTES = {
    "H2O": {"label":"H$_2$O", "ls":"-",  "lw":2.2},
    "D2O": {"label":"D$_2$O", "ls":"--", "lw":2.0},
    "T2O": {"label":"T$_2$O", "ls":"-.", "lw":2.0},
    "H2S": {"label":"H$_2$S", "ls":"-",  "lw":2.2},
    "D2S": {"label":"D$_2$S", "ls":"--", "lw":2.0},
    "T2S": {"label":"T$_2$S", "ls":"-.", "lw":2.0},
}

GROUPS = {
    "combo_H2O_KIE": ["H2O","D2O","T2O"],
    "combo_H2S_vs_H2O": ["H2O","H2S"],
    "combo_H2S_KIE": ["H2S","D2S","T2S"],
    "combo_H2O_vs_T2S": ["H2O","T2S"],
}

# --- flexible column inference helpers ---

TIME_CANDIDATES = [
    "time_fs", "t_fs", "time", "t", "time_ps", "t_ps", "time_as", "t_as"
]

TOTAL_CANDIDATES = [
    "P_OW_total","P_total_OW","P_WATER_OXYGENS","P_water_oxygens",
    "P_OW","P_OWsum","P_OW_all","P_water_total","P_total_water_oxygens"
]

OW3_CANDIDATES = ["P_OW3","P_OW_3","P_W3","P_OW3_pop","P_water_OW3"]
OW4_CANDIDATES = ["P_OW4","P_OW_4","P_W4","P_OW4_pop","P_water_OW4"]

def find_first_present(df, candidates):
    for c in candidates:
        if c in df.columns:
            return c
    # also try case-insensitive contains
    lower = {c.lower(): c for c in df.columns}
    for cand in candidates:
        for c in df.columns:
            if cand.lower() == c.lower():
                return c
    return None

def coerce_time_to_fs(df, time_col):
    s = df[time_col].astype(float)
    # heuristics: if max < 1e-2 ‚Üí seconds;  if 1e-3..  else
    # We look for explicit units in name first:
    name = time_col.lower()
    if "ps" in name:
        return s * 1000.0         # ps ‚Üí fs
    if "as" in name:
        return s * 1e-3           # as ‚Üí fs
    # Otherwise guess by scale:
    mx = s.max()
    # if max < 2, likely ps; if max > 2000, likely as; else already fs
    if mx < 2.0:
        return s * 1000.0         # ps-ish ‚Üí fs
    if mx > 2e3 and mx < 2e6:
        return s * 1e-3           # as-ish ‚Üí fs
    return s                      # assume fs

def load_condition_csv(cif_stem, cond):
    csv_path = Path(ROOT) / cif_stem / cond / f"populations_{cond}.csv"
    if not csv_path.exists():
        return None

    df = pd.read_csv(csv_path)

    # TIME
    tcol = find_first_present(df, TIME_CANDIDATES)
    if tcol is None:
        print(f"[{cif_stem}/{cond}] No time column found ‚Üí skip.")
        return None
    time_fs = coerce_time_to_fs(df, tcol)

    # OW3 / OW4 optional
    ow3_col = find_first_present(df, OW3_CANDIDATES)
    ow4_col = find_first_present(df, OW4_CANDIDATES)

    P_OW3 = df[ow3_col].astype(float) if ow3_col else None
    P_OW4 = df[ow4_col].astype(float) if ow4_col else None

    # TOTAL preferred
    tot_col = find_first_present(df, TOTAL_CANDIDATES)
    if tot_col:
        P_OW_total = df[tot_col].astype(float)
    else:
        if (P_OW3 is not None) and (P_OW4 is not None):
            P_OW_total = P_OW3 + P_OW4
        else:
            # last resort: any column that looks like "total"/"sum"
            guess = [c for c in df.columns if "total" in c.lower() or "sum" in c.lower()]
            if guess:
                P_OW_total = df[guess[0]].astype(float)
            else:
                print(f"[{cif_stem}/{cond}] No P_OW_total/OW3/OW4 columns ‚Üí skip.")
                return None

    out = pd.DataFrame({"time_fs": time_fs, "P_OW_total": P_OW_total})
    if P_OW3 is not None: out["P_OW3"] = P_OW3
    else: out["P_OW3"] = pd.NA
    if P_OW4 is not None: out["P_OW4"] = P_OW4
    else: out["P_OW4"] = pd.NA
    return out

def find_cif_stems(root):
    stems = []
    for p in Path(root).glob("*"):
        if p.is_dir():
            for cond in ["H2O","D2O","T2O","H2S","D2S","T2S"]:
                if (p/cond/f"populations_{cond}.csv").exists():
                    stems.append(p.name); break
    return sorted(stems)

def plot_group(cif_stem, group_name, cond_list):
    outdir = Path(ROOT) / cif_stem / "combined"
    outdir.mkdir(parents=True, exist_ok=True)
    figfile = outdir / f"{group_name}.png"

    plt.figure(figsize=(7.6, 5.2))
    have_any = False

    for cond in cond_list:
        df = load_condition_csv(cif_stem, cond)
        if df is None:
            print(f"[{cif_stem}] Missing/invalid: {cond} ‚Üí skipping in {group_name}")
            continue
        style = PALETTES.get(cond, {"label":cond, "ls":"-", "lw":2.0})

        plt.plot(df["time_fs"], df["P_OW_total"]*100.0,
                 linestyle=style["ls"], linewidth=style["lw"], label=style["label"])
        # light OW3/OW4 if present (not NA)
        if df["P_OW3"].notna().any():
            plt.plot(df["time_fs"], df["P_OW3"]*100.0, linestyle=":", linewidth=1.1, alpha=0.45)
        if df["P_OW4"].notna().any():
            plt.plot(df["time_fs"], df["P_OW4"]*100.0, linestyle=":", linewidth=1.1, alpha=0.45)
        have_any = True

    if not have_any:
        plt.close()
        return None

    plt.title(f"{cif_stem}: {group_name.replace('_',' ')}")
    plt.xlabel("Time (fs)")
    plt.ylabel("Population on water oxygens (%)")
    plt.grid(True, linestyle="--", alpha=0.35)
    plt.legend(title="Condition", ncol=3, frameon=False)
    plt.tight_layout()
    plt.savefig(figfile, dpi=300)
    plt.close()
    return figfile

def write_latex_snippet(cif_stem):
    outdir = Path(ROOT) / cif_stem / "combined"
    texfile = outdir / "figure_snippet.tex"
    panels = [outdir/f"{name}.png" for name in [
        "combo_H2O_KIE","combo_H2S_vs_H2O","combo_H2S_KIE","combo_H2O_vs_T2S"
    ] if (outdir/f"{name}.png").exists()]
    if not panels:
        return
    tex = r"""
% Auto-generated for %s
\noindent
\begin{minipage}{\linewidth}\centering
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$O KIE: H$_2$O/D$_2$O/T$_2$O
  \end{minipage}\hfill
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$S vs H$_2$O
  \end{minipage}\\[0.5em]
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$S KIE: H$_2$S/D$_2$S/T$_2$S
  \end{minipage}\hfill
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$O vs T$_2$S (max contrast)
  \end{minipage}
\end{minipage}
""" % (
        cif_stem,
        (outdir/"combo_H2O_KIE.png"),
        (outdir/"combo_H2S_vs_H2O.png"),
        (outdir/"combo_H2S_KIE.png"),
        (outdir/"combo_H2O_vs_T2S.png"),
    )
    with open(texfile, "w") as f:
        f.write(tex)
    print(f"[{cif_stem}] wrote LaTeX snippet ‚Üí {texfile}")

def main():
    cif_stems = find_cif_stems(ROOT)
    print("CIFs:", cif_stems)
    for stem in cif_stems:
        any_plot = False
        for group_name, conds in GROUPS.items():
            out = plot_group(stem, group_name, conds)
            if out is not None:
                any_plot = True
        if any_plot:
            write_latex_snippet(stem)

if __name__ == "__main__":
    main()

CIFs: ['6DHE', '6W1U', '6W1V', '8EZ5', '8F4C', '8F4D', '8F4E', '8F4F', '8F4G', '8F4H', '8F4I', '8F4J', '8F4K']


ValueError: unsupported format character 'A' (0x41) at index 3

In [None]:
# ==============================================================
# GQR_OUT multi-CIF comparison visualizer
#   ‚Ä¢ builds 4 comparison plots per CIF
#   ‚Ä¢ auto-detects time/unit and population columns
#   ‚Ä¢ writes LaTeX 2√ó2 minipage panel for easy inclusion
# ==============================================================

import os
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

ROOT = "GQR_OUT"   # adjust if your results directory differs

# --- Plot styles ---
PALETTES = {
    "H2O": {"label":"H$_2$O", "ls":"-",  "lw":2.2},
    "D2O": {"label":"D$_2$O", "ls":"--", "lw":2.0},
    "T2O": {"label":"T$_2$O", "ls":"-.", "lw":2.0},
    "H2S": {"label":"H$_2$S", "ls":"-",  "lw":2.2},
    "D2S": {"label":"D$_2$S", "ls":"--", "lw":2.0},
    "T2S": {"label":"T$_2$S", "ls":"-.", "lw":2.0},
}

# --- Which comparisons to plot ---
GROUPS = {
    "combo_H2O_KIE": ["H2O","D2O","T2O"],
    "combo_H2S_vs_H2O": ["H2O","H2S"],
    "combo_H2S_KIE": ["H2S","D2S","T2S"],
    "combo_H2O_vs_T2S": ["H2O","T2S"],
}

# --- column name guesses ---
TIME_CANDIDATES = [
    "time_fs","t_fs","time","t","time_ps","t_ps","time_as","t_as"
]
TOTAL_CANDIDATES = [
    "P_OW_total","P_total_OW","P_WATER_OXYGENS","P_water_oxygens",
    "P_OW","P_OWsum","P_OW_all","P_water_total","P_total_water_oxygens"
]
OW3_CANDIDATES = ["P_OW3","P_OW_3","P_W3","P_OW3_pop","P_water_OW3"]
OW4_CANDIDATES = ["P_OW4","P_OW_4","P_W4","P_OW4_pop","P_water_OW4"]

# --------------------------------------------------------------
# Helper functions
# --------------------------------------------------------------

def find_first_present(df, candidates):
    """Return the first column name in df that matches any in candidates."""
    for c in candidates:
        if c in df.columns:
            return c
    for c in df.columns:
        for cand in candidates:
            if cand.lower() == c.lower():
                return c
    return None

def coerce_time_to_fs(df, time_col):
    """Convert any reasonable time column to femtoseconds."""
    s = df[time_col].astype(float)
    name = time_col.lower()
    if "ps" in name: return s * 1000.0
    if "as" in name: return s * 1e-3
    mx = s.max()
    if mx < 2.0: return s * 1000.0    # probably ps
    if 2e3 < mx < 2e6: return s * 1e-3  # probably as
    return s

def load_condition_csv(cif_stem, cond):
    """Load a CSV safely, deriving missing columns if needed."""
    csv_path = Path(ROOT) / cif_stem / cond / f"populations_{cond}.csv"
    if not csv_path.exists():
        return None

    df = pd.read_csv(csv_path)
    tcol = find_first_present(df, TIME_CANDIDATES)
    if tcol is None:
        print(f"[{cif_stem}/{cond}] no time column ‚Üí skip")
        return None
    time_fs = coerce_time_to_fs(df, tcol)

    ow3_col = find_first_present(df, OW3_CANDIDATES)
    ow4_col = find_first_present(df, OW4_CANDIDATES)
    P_OW3 = df[ow3_col].astype(float) if ow3_col else None
    P_OW4 = df[ow4_col].astype(float) if ow4_col else None

    tot_col = find_first_present(df, TOTAL_CANDIDATES)
    if tot_col:
        P_OW_total = df[tot_col].astype(float)
    elif (P_OW3 is not None) and (P_OW4 is not None):
        P_OW_total = P_OW3 + P_OW4
    else:
        print(f"[{cif_stem}/{cond}] missing OW totals ‚Üí skip")
        return None

    out = pd.DataFrame({"time_fs": time_fs, "P_OW_total": P_OW_total})
    out["P_OW3"] = P_OW3 if P_OW3 is not None else pd.NA
    out["P_OW4"] = P_OW4 if P_OW4 is not None else pd.NA
    return out

def find_cif_stems(root):
    stems = []
    for p in Path(root).glob("*"):
        if p.is_dir():
            for cond in ["H2O","D2O","T2O","H2S","D2S","T2S"]:
                if (p/cond/f"populations_{cond}.csv").exists():
                    stems.append(p.name)
                    break
    return sorted(stems)

# --------------------------------------------------------------
# Plotting and LaTeX snippet generation
# --------------------------------------------------------------

def plot_group(cif_stem, group_name, cond_list):
    outdir = Path(ROOT) / cif_stem / "combined"
    outdir.mkdir(parents=True, exist_ok=True)
    figfile = outdir / f"{group_name}.png"

    plt.figure(figsize=(7.6,5.2))
    have_any = False

    for cond in cond_list:
        df = load_condition_csv(cif_stem, cond)
        if df is None:
            print(f"[{cif_stem}] missing {cond} for {group_name}")
            continue
        style = PALETTES.get(cond, {"label":cond, "ls":"-", "lw":2.0})
        plt.plot(df["time_fs"], df["P_OW_total"]*100.0,
                 linestyle=style["ls"], linewidth=style["lw"], label=style["label"])
        if df["P_OW3"].notna().any():
            plt.plot(df["time_fs"], df["P_OW3"]*100.0, ":", lw=1.1, alpha=0.45)
        if df["P_OW4"].notna().any():
            plt.plot(df["time_fs"], df["P_OW4"]*100.0, ":", lw=1.1, alpha=0.45)
        have_any = True

    if not have_any:
        plt.close()
        return None

    plt.title(f"{cif_stem}: {group_name.replace('_',' ')}")
    plt.xlabel("Time (fs)")
    plt.ylabel("Population on water oxygens (%)")
    plt.grid(True, linestyle="--", alpha=0.35)
    plt.legend(title="Condition", ncol=3, frameon=False)
    plt.tight_layout()
    plt.savefig(figfile, dpi=300)
    plt.close()
    return figfile

def write_latex_snippet(cif_stem):
    """Writes a 2√ó2 panel LaTeX snippet under GQR_OUT/<CIF>/combined"""
    outdir = Path(ROOT) / cif_stem / "combined"
    texfile = outdir / "figure_snippet.tex"

    panels = [outdir/f"{n}.png" for n in
              ["combo_H2O_KIE","combo_H2S_vs_H2O","combo_H2S_KIE","combo_H2O_vs_T2S"]
              if (outdir/f"{n}.png").exists()]
    if not panels:
        return

    tex = r"""
%% Auto-generated for %s
\noindent
\begin{minipage}{\linewidth}\centering
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$O KIE: H$_2$O/D$_2$O/T$_2$O
  \end{minipage}\hfill
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$S vs H$_2$O
  \end{minipage}\\[0.5em]
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$S KIE: H$_2$S/D$_2$S/T$_2$S
  \end{minipage}\hfill
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$O vs T$_2$S (max contrast)
  \end{minipage}
\end{minipage}
""" % (
        cif_stem,
        str(outdir/"combo_H2O_KIE.png").replace("\\","/"),
        str(outdir/"combo_H2S_vs_H2O.png").replace("\\","/"),
        str(outdir/"combo_H2S_KIE.png").replace("\\","/"),
        str(outdir/"combo_H2O_vs_T2S.png").replace("\\","/")
    )

    outdir.mkdir(parents=True, exist_ok=True)
    with open(texfile, "w", encoding="utf-8") as f:
        f.write(tex)
    print(f"[{cif_stem}] wrote LaTeX snippet ‚Üí {texfile}")

# --------------------------------------------------------------
# Main entry
# --------------------------------------------------------------

def main():
    cif_stems = find_cif_stems(ROOT)
    print("CIFs:", cif_stems)
    for stem in cif_stems:
        any_plot = False
        for group_name, conds in GROUPS.items():
            out = plot_group(stem, group_name, conds)
            if out: any_plot = True
        if any_plot:
            write_latex_snippet(stem)

if __name__ == "__main__":
    main()

CIFs: ['6DHE', '6W1U', '6W1V', '8EZ5', '8F4C', '8F4D', '8F4E', '8F4F', '8F4G', '8F4H', '8F4I', '8F4J', '8F4K']
[6DHE/H2O] missing OW totals ‚Üí skip
[6DHE] missing H2O for combo_H2O_KIE
[6DHE/D2O] missing OW totals ‚Üí skip
[6DHE] missing D2O for combo_H2O_KIE
[6DHE/T2O] missing OW totals ‚Üí skip
[6DHE] missing T2O for combo_H2O_KIE
[6DHE/H2O] missing OW totals ‚Üí skip
[6DHE] missing H2O for combo_H2S_vs_H2O
[6DHE/H2S] missing OW totals ‚Üí skip
[6DHE] missing H2S for combo_H2S_vs_H2O
[6DHE/H2S] missing OW totals ‚Üí skip
[6DHE] missing H2S for combo_H2S_KIE
[6DHE/D2S] missing OW totals ‚Üí skip
[6DHE] missing D2S for combo_H2S_KIE
[6DHE/T2S] missing OW totals ‚Üí skip
[6DHE] missing T2S for combo_H2S_KIE
[6DHE/H2O] missing OW totals ‚Üí skip
[6DHE] missing H2O for combo_H2O_vs_T2S
[6DHE/T2S] missing OW totals ‚Üí skip
[6DHE] missing T2S for combo_H2O_vs_T2S
[6W1U/H2O] missing OW totals ‚Üí skip
[6W1U] missing H2O for combo_H2O_KIE
[6W1U/D2O] missing OW totals ‚Üí skip
[6W1U] missing

In [None]:
# ==============================================================
# GQR_OUT multi-CIF comparison visualizer (robust header autodetect)
# ==============================================================

import os, re
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

ROOT = "GQR_OUT"   # base results folder

PALETTES = {
    "H2O": {"label":"H$_2$O", "ls":"-",  "lw":2.2},
    "D2O": {"label":"D$_2$O", "ls":"--", "lw":2.0},
    "T2O": {"label":"T$_2$O", "ls":"-.", "lw":2.0},
    "H2S": {"label":"H$_2$S", "ls":"-",  "lw":2.2},
    "D2S": {"label":"D$_2$S", "ls":"--", "lw":2.0},
    "T2S": {"label":"T$_2$S", "ls":"-.", "lw":2.0},
}

GROUPS = {
    "combo_H2O_KIE": ["H2O","D2O","T2O"],
    "combo_H2S_vs_H2O": ["H2O","H2S"],
    "combo_H2S_KIE": ["H2S","D2S","T2S"],
    "combo_H2O_vs_T2S": ["H2O","T2S"],
}

TIME_GUESSES = ["time_fs","t_fs","time","t","time_ps","t_ps","time_as","t_as"]

def find_cif_stems(root):
    stems = []
    for p in Path(root).glob("*"):
        if not p.is_dir():
            continue
        for cond in ["H2O","D2O","T2O","H2S","D2S","T2S"]:
            if (p/cond/f"populations_{cond}.csv").exists():
                stems.append(p.name)
                break
    return sorted(stems)

def _norm(s: str) -> str:
    return re.sub(r'[^a-z0-9]+','_', s.lower())

def _first_present(cols, candidates):
    for c in candidates:
        if c in cols:
            return c
    # case-insensitive fallback
    low = {c.lower():c for c in cols}
    for c in candidates:
        if c.lower() in low:
            return low[c.lower()]
    return None

def detect_time_to_fs(df):
    cols = list(df.columns)
    tcol = _first_present(cols, TIME_GUESSES)
    if tcol is None:
        # heuristic: any column that looks like time
        for c in cols:
            if re.search(r'\b(t|time)\b', _norm(c)):
                tcol = c; break
    if tcol is None:
        return None, None
    series = pd.to_numeric(df[tcol], errors='coerce')
    name = _norm(tcol)
    if "ps" in name: series = series * 1000.0
    elif "as" in name: series = series * 1e-3
    else:
        mx = series.max(skipna=True)
        if pd.notna(mx):
            if mx < 2.0:       # likely ps
                series = series * 1000.0
            elif 2e3 < mx < 2e6:  # likely as
                series = series * 1e-3
    return tcol, series

def _likely_total(coln):
    s = _norm(coln)
    return ("total" in s) and (("ow" in s) or ("water_oxygen" in s) or ("water_oxygens" in s) or ("water" in s and "oxygen" in s))

def _is_ow3(coln):
    s = _norm(coln)
    return (("ow3" in s) or re.search(r'(ow|water(_|)oxygen|w)[^0-9]*3\b', s) is not None) and ("hydrogen" not in s) and ("hw" not in s)

def _is_ow4(coln):
    s = _norm(coln)
    return (("ow4" in s) or re.search(r'(ow|water(_|)oxygen|w)[^0-9]*4\b', s) is not None) and ("hydrogen" not in s) and ("hw" not in s)

def _percent_to_fraction(series: pd.Series) -> pd.Series:
    # if it looks like % (max>1 and <=100), convert to 0..1
    mx = series.max(skipna=True)
    if pd.notna(mx) and mx > 1.5 and mx <= 100.0:
        return series / 100.0
    return series

def detect_water_series(df):
    cols = list(df.columns)
    # try explicit total first
    total_candidates = [c for c in cols if _likely_total(c)]
    P_total = None
    if total_candidates:
        # pick the first
        P_total = pd.to_numeric(df[total_candidates[0]], errors='coerce')
        P_total = _percent_to_fraction(P_total)

    # try OW3/OW4
    ow3_list = [c for c in cols if _is_ow3(c)]
    ow4_list = [c for c in cols if _is_ow4(c)]
    P_OW3 = pd.to_numeric(df[ow3_list[0]], errors='coerce') if ow3_list else None
    P_OW4 = pd.to_numeric(df[ow4_list[0]], errors='coerce') if ow4_list else None
    if P_OW3 is not None: P_OW3 = _percent_to_fraction(P_OW3)
    if P_OW4 is not None: P_OW4 = _percent_to_fraction(P_OW4)

    # if no OW3/OW4 but we have multiple OW-like oxygen columns, sum them
    if P_OW3 is None or P_OW4 is None:
        oxy_cols = [c for c in cols if ("ow" in _norm(c) or "water_oxygen" in _norm(c) or re.search(r'\bow[34]\b', _norm(c)))
                    and ("hydrogen" not in _norm(c)) and ("hw" not in _norm(c))]
        # Prefer two with 3/4
        if len(ow3_list)==0 or len(ow4_list)==0:
            # fallback: try pattern groups with explicit digits 3/4
            c3 = None; c4 = None
            for c in oxy_cols:
                s = _norm(c)
                if re.search(r'(^|_)3($|_)', s): c3 = c
                if re.search(r'(^|_)4($|_)', s): c4 = c
            if c3 and P_OW3 is None:
                P_OW3 = _percent_to_fraction(pd.to_numeric(df[c3], errors='coerce'))
            if c4 and P_OW4 is None:
                P_OW4 = _percent_to_fraction(pd.to_numeric(df[c4], errors='coerce'))

    # synthesize total if missing
    if P_total is None and (P_OW3 is not None) and (P_OW4 is not None):
        P_total = P_OW3.fillna(0) + P_OW4.fillna(0)

    return {
        "P_total": P_total,
        "P_OW3": P_OW3,
        "P_OW4": P_OW4,
        "ow3_name": ow3_list[0] if ow3_list else None,
        "ow4_name": ow4_list[0] if ow4_list else None,
        "total_name": total_candidates[0] if total_candidates else None,
    }

def load_condition_csv(cif_stem, cond):
    csv_path = Path(ROOT) / cif_stem / cond / f"populations_{cond}.csv"
    if not csv_path.exists():
        return None

    df_raw = pd.read_csv(csv_path)
    tname, time_fs = detect_time_to_fs(df_raw)
    if tname is None:
        print(f"[{cif_stem}/{cond}] ‚ö†Ô∏è no time column; columns={list(df_raw.columns)[:6]}...")
        return None

    water = detect_water_series(df_raw)
    P_total = water["P_total"]; P3 = water["P_OW3"]; P4 = water["P_OW4"]

    if P_total is None:
        print(f"[{cif_stem}/{cond}] missing OW totals ‚Üí tried OW3/OW4={water['ow3_name']},{water['ow4_name']} | total={water['total_name']}")
        return None

    # Debug mapping (once per file)
    print(f"[{cif_stem}/{cond}] ‚Æï time='{tname}' | total='{water['total_name']}' | OW3='{water['ow3_name']}' | OW4='{water['ow4_name']}'")

    out = pd.DataFrame({"time_fs": time_fs, "P_OW_total": P_total})
    out["P_OW3"] = P3 if P3 is not None else pd.NA
    out["P_OW4"] = P4 if P4 is not None else pd.NA
    return out

def plot_group(cif_stem, group_name, cond_list):
    outdir = Path(ROOT) / cif_stem / "combined"
    outdir.mkdir(parents=True, exist_ok=True)
    figfile = outdir / f"{group_name}.png"

    plt.figure(figsize=(7.6,5.2))
    have_any = False

    for cond in cond_list:
        df = load_condition_csv(cif_stem, cond)
        if df is None:
            continue
        style = PALETTES.get(cond, {"label":cond, "ls":"-", "lw":2.0})
        plt.plot(df["time_fs"], df["P_OW_total"]*100.0,
                 linestyle=style["ls"], linewidth=style["lw"], label=style["label"])
        if df["P_OW3"].notna().any():
            plt.plot(df["time_fs"], df["P_OW3"]*100.0, ":", lw=1.1, alpha=0.45)
        if df["P_OW4"].notna().any():
            plt.plot(df["time_fs"], df["P_OW4"]*100.0, ":", lw=1.1, alpha=0.45)
        have_any = True

    if not have_any:
        plt.close()
        return None

    plt.title(f"{cif_stem}: {group_name.replace('_',' ')}")
    plt.xlabel("Time (fs)")
    plt.ylabel("Population on water oxygens (%)")
    plt.grid(True, linestyle="--", alpha=0.35)
    plt.legend(title="Condition", ncol=3, frameon=False)
    plt.tight_layout()
    plt.savefig(figfile, dpi=300)
    plt.close()
    return figfile

def write_latex_snippet(cif_stem):
    outdir = Path(ROOT) / cif_stem / "combined"
    texfile = outdir / "figure_snippet.tex"
    panels = [outdir/f"{n}.png" for n in
              ["combo_H2O_KIE","combo_H2S_vs_H2O","combo_H2S_KIE","combo_H2O_vs_T2S"]
              if (outdir/f"{n}.png").exists()]
    if not panels:
        return
    tex = r"""
%% Auto-generated for %s
\noindent
\begin{minipage}{\linewidth}\centering
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$O KIE: H$_2$O/D$_2$O/T$_2$O
  \end{minipage}\hfill
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$S vs H$_2$O
  \end{minipage}\\[0.5em]
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$S KIE: H$_2$S/D$_2$S/T$_2$S
  </end{minipage}\hfill
  \begin{minipage}{0.49\linewidth}\centering
    \includegraphics[width=\linewidth]{%s}\\[-0.25em]
    \small H$_2$O vs T$_2$S (max contrast)
  \end{minipage}
\end{minipage}
""" % (
        cif_stem,
        str(outdir/"combo_H2O_KIE.png").replace("\\","/"),
        str(outdir/"combo_H2S_vs_H2O.png").replace("\\","/"),
        str(outdir/"combo_H2S_KIE.png").replace("\\","/"),
        str(outdir/"combo_H2O_vs_T2S.png").replace("\\","/")
    )
    outdir.mkdir(parents=True, exist_ok=True)
    with open(texfile, "w", encoding="utf-8") as f:
        f.write(tex)
    print(f"[{cif_stem}] wrote LaTeX snippet ‚Üí {texfile}")

def main():
    cif_stems = find_cif_stems(ROOT)
    print("CIFs:", cif_stems)
    for stem in cif_stems:
        any_plot = False
        for group_name, conds in GROUPS.items():
            out = plot_group(stem, group_name, conds)
            if out: any_plot = True
        if any_plot:
            write_latex_snippet(stem)

if __name__ == "__main__":
    main()

CIFs: ['6DHE', '6W1U', '6W1V', '8EZ5', '8F4C', '8F4D', '8F4E', '8F4F', '8F4G', '8F4H', '8F4I', '8F4J', '8F4K']
[6DHE/H2O] ‚Æï time='time_fs' | total='None' | OW3='pop_OW3' | OW4='pop_OW4'
[6DHE/D2O] ‚Æï time='time_fs' | total='None' | OW3='pop_OW3' | OW4='pop_OW4'
[6DHE/T2O] ‚Æï time='time_fs' | total='None' | OW3='pop_OW3' | OW4='pop_OW4'
[6DHE/H2O] ‚Æï time='time_fs' | total='None' | OW3='pop_OW3' | OW4='pop_OW4'
[6DHE/H2S] ‚Æï time='time_fs' | total='None' | OW3='pop_OW3' | OW4='pop_OW4'
[6DHE/H2S] ‚Æï time='time_fs' | total='None' | OW3='pop_OW3' | OW4='pop_OW4'
[6DHE/D2S] ‚Æï time='time_fs' | total='None' | OW3='pop_OW3' | OW4='pop_OW4'
[6DHE/T2S] ‚Æï time='time_fs' | total='None' | OW3='pop_OW3' | OW4='pop_OW4'
[6DHE/H2O] ‚Æï time='time_fs' | total='None' | OW3='pop_OW3' | OW4='pop_OW4'
[6DHE/T2S] ‚Æï time='time_fs' | total='None' | OW3='pop_OW3' | OW4='pop_OW4'
[6DHE] wrote LaTeX snippet ‚Üí GQR_OUT/6DHE/combined/figure_snippet.tex
[6W1U/H2O] ‚Æï time='time_fs' | total='None' | 