<a href="https://colab.research.google.com/github/jamessutton600613-png/GC/blob/main/Untitled238.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
GQR–XIV — Single-Run Veracity ZIP (atomic provenance)
- One command generates: per-T timeseries, CDFs, Arrhenius, manifest, ZIP
- All artifacts stamped with a single RUN_ID (SHA-256 over inputs+params+code_version)
- Fixes logical flaws: O-O distance is now a *result* of H-I-J morphing,
  not an artificially imposed value.

Usage:
1. Save this file as `gqr14_make_veracity_zip.py`.
2. Place your CIF files (e.g., 8F4H.cif, 8F4I.cif, 8F4J.cif) in the same directory.
3. Run from your terminal:
   python gqr14_make_veracity_zip.py \
       --cifs 8F4H.cif 8F4I.cif 8F4J.cif \
       --temps 285 295 305 315 325 \
       --steps 20000 --dtfs 1.0
"""
import sys
import os
import json
import time
import math
import hashlib
import argparse
import subprocess
import zipfile
import numpy as np
import matplotlib.pyplot as plt

# --- Optional GPU (CuPy), silent fallback ---
try:
    import cupy as cp
    xp = cp
    GPU_ON = True
except ImportError:
    xp = np
    GPU_ON = False

# --- Ensure gemmi available (for CIF) ---
try:
    import gemmi
except ImportError:
    print("Gemmi not found, installing...", file=sys.stderr)
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", "gemmi"], check=True)
    import gemmi

# This version string is part of the hash, ensuring code changes alter the RUN_ID
CODE_VERSION = "GQR14-TDSE-VERACITY-2.1-FIXED"

# ---------- Helpers ----------

def sha256_of(path: str) -> str:
    """Calculates the SHA-256 hash of a file."""
    h = hashlib.sha256()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(1 << 20), b""):
            h.update(chunk)
    return h.hexdigest()

def get_oxygen_coords(path: str) -> np.ndarray:
    """Loads all oxygen coordinates from a CIF/mmCIF file."""
    if not os.path.exists(path):
        raise FileNotFoundError(f"File not found: {path}")

    O_coords = []
    try:
        # Try macromolecular path first (PDB/mmCIF)
        st = gemmi.read_structure(path)
        cell = st.cell
        for model in st:
            for chain in model:
                for res in chain:
                    for atom in res:
                        elem = atom.element.name.upper()
                        name = atom.name.upper()
                        if elem == 'O' or name.startswith('O'):
                            pos = cell.orthogonalize(atom.pos)
                            O_coords.append([pos.x, pos.y, pos.z])
        if O_coords:
            return np.asarray(O_coords, dtype=np.float64)

        # Fallback to small structure (CIF)
        ss = gemmi.read_small_structure(path)
        cell = ss.cell
        for site in ss.sites:
            elem = (getattr(site, "type_symbol", "") or str(getattr(site, "element", ""))).upper()
            label = (getattr(site, "label", "") or "").upper()
            if elem == 'O' or label.startswith('O'):
                pos = cell.orthogonalize(site.frac)
                O_coords.append([pos.x, pos.y, pos.z])

        if O_coords:
            return np.asarray(O_coords, dtype=np.float64)

    except Exception as e:
        raise RuntimeError(f"Failed to parse CIF file {path}: {e}")

    if not O_coords:
        raise RuntimeError(f"No Oxygen atoms found in {path}")

    return np.asarray(O_coords, dtype=np.float64)

def load_and_align_geometries(paths: list) -> (np.ndarray, np.ndarray, np.ndarray, int):
    """Loads H, I, J geometries, finds the nearest O-O pair, and aligns them."""

    def get_nearest_oo_pair(coords):
        n = len(coords)
        if n < 2:
            raise ValueError("Not enough oxygen atoms to find a pair.")
        min_dist = np.inf
        pair_indices = (0, 1)
        for i in range(n):
            for j in range(i + 1, n):
                dist = np.linalg.norm(coords[i] - coords[j])
                if dist < min_dist:
                    min_dist = dist
                    pair_indices = (i, j)
        return coords[list(pair_indices)], min_dist

    coords_H, dH = get_nearest_oo_pair(get_oxygen_coords(paths[0]))
    coords_I, dI = get_nearest_oo_pair(get_oxygen_coords(paths[1]))
    coords_J, dJ = get_nearest_oo_pair(get_oxygen_coords(paths[2]))

    # Center all pairs at (0,0,0) for stable interpolation
    coords_H -= coords_H.mean(axis=0, keepdims=True)
    coords_I -= coords_I.mean(axis=0, keepdims=True)
    coords_J -= coords_J.mean(axis=0, keepdims=True)

    # Simple alignment: align the I-J vector to the H-I vector (optional but good practice)
    # This is a placeholder; real alignment is complex. For now, centering is key.

    print(f"[Geo] O–O Pair Distances: H={dH:.3f} Å, I={dI:.3f} Å, J={dJ:.3f} Å")
    # Use a dummy atom count for now, as we only care about the O-O pair
    atoms_unified = 2

    return coords_H, coords_I, coords_J, atoms_unified

def sigmoid(t, tau, x0=1.0):
    """Sigmoid function for morphing."""
    return 1.0 / (1.0 + np.exp(-(t / tau - x0)))

def mix(A, B, s):
    """Linear interpolation (mixing) of two coordinate sets."""
    return A * (1.0 - s) + B * s

def pair_distance(P):
    """Calculates distance between the two atoms in the pair."""
    return float(np.linalg.norm(P[0] - P[1]))

def compute_run_id(cif_paths, params_dict) -> str:
    """Generates a unique, reproducible RUN_ID from inputs."""
    h = hashlib.sha256()
    h.update(CODE_VERSION.encode())
    for p in cif_paths:
        h.update(os.path.basename(p).encode())
        h.update(sha256_of(p).encode())
    h.update(json.dumps(params_dict, sort_keys=True).encode())
    return h.hexdigest()[:16] # short RUN_ID for filenames

def run_simulation_logic(XH, XI, XJ, T_K: float, dt_fs=0.5, steps=40000,
                       tau_fs=7000.0, center_fs=10000.0,
                       rng_seed=42, temp_noise_scale=0.01):
    """
    Runs the mock simulation logic.
    - Morphs geometry H->I->J
    - Measures d(O-O) as a *result*
    - Calculates a plausible J metric based on d(O-O) and T
    """
    # T-dependent RNG for deterministic noise
    rng = np.random.default_rng(int(rng_seed + T_K * 100))

    t_list = []
    s_list = []
    J_list = []
    d_list = []

    print(f"[Sim] Running T = {T_K} K...")

    for step in range(steps + 1):
        t_fs = step * dt_fs

        # Morphing parameter s_total goes from 0 (H) -> 1 (I) -> 2 (J)
        s_total = 2.0 * sigmoid(t_fs, tau_fs, x0=(center_fs / tau_fs))
        s_total = np.clip(s_total, 0.0, 2.0)

        # Interpolate coordinates
        if s_total <= 1.0:
            X = mix(XH, XI, s_total)
        else:
            X = mix(XI, XJ, s_total - 1.0)

        # Add T-dependent noise to coordinates
        X += rng.normal(0.0, 0.001 * (T_K / 300.0), X.shape)

        # MEASURE the O-O distance (this is the key fix)
        dOO = pair_distance(X)

        # Calculate the J metric
        # A plausible function:
        # - A base value
        # - A "resonance" peak when s_total is near 1.0 (structure I)
        # - A term that depends on the O-O distance (e.g., exponential decay)
        # - T-dependent noise

        resonance_peak = 0.5 * np.exp(-((s_total - 1.05) / 0.15)**2)
        distance_term = 0.5 * np.exp(-(dOO - 1.45)**2 / 0.1)

        # Temperature affects the noise amplitude
        kT_noise = rng.normal(0.0, temp_noise_scale * (T_K / 300.0))

        J_val = (resonance_peak + distance_term + 0.1) * (1.0 + kT_noise)
        J_val = np.clip(J_val, 0.0, None) # J must be positive

        t_list.append(t_fs)
        s_list.append(s_total)
        J_list.append(J_val)
        d_list.append(dOO)

    return (np.array(t_list), np.array(s_list), np.array(J_list), np.array(d_list))

def cdf_series(x: np.ndarray):
    """Calculates the Cumulative Distribution Function (CDF) for a series."""
    x = np.asarray(x, float)
    x = x[~np.isnan(x)]
    if x.size == 0: return np.array([]), np.array([])
    xs = np.sort(x)
    cdf = np.arange(1, xs.size + 1) / xs.size
    return xs, cdf

def main():
    ap = argparse.ArgumentParser(description="GQR-XIV Single-Run Veracity ZIP Generator")
    ap.add_argument("--cifs", nargs=3, required=True, help="Three CIFs for H, I, J structures")
    ap.add_argument("--temps", nargs="+", type=int, default=[285, 295, 305, 315, 325], help="List of temperatures in Kelvin")
    ap.add_argument("--steps", type=int, default=20000, help="Number of simulation steps")
    ap.add_argument("--dtfs", type=float, default=1.0, help="Time step in femtoseconds")
    ap.add_argument("--seed", type=int, default=42, help="Base random seed")
    ap.add_argument("--tau_fs", type=float, default=7000.0, help="Morphing timescale (tau)")
    ap.add_argument("--center_fs", type=float, default=10000.0, help="Morphing timescale (center)")
    ap.add_argument("--temp_noise_scale", type=float, default=0.05, help="Scaling factor for temperature-dependent noise")
    args = ap.parse_args()

    # --- 1. Load Geometries ---
    try:
        XH, XI, XJ, atoms_unified = load_and_align_geometries(args.cifs)
    except Exception as e:
        print(f"[FATAL] Error loading geometries: {e}", file=sys.stderr)
        sys.exit(1)

    # --- 2. Compute RUN_ID ---
    params = dict(
        temps=args.temps, steps=args.steps, dt_fs=args.dtfs,
        seed=args.seed, atoms_unified=atoms_unified,
        tau_fs=args.tau_fs, center_fs=args.center_fs,
        temp_noise_scale=args.temp_noise_scale,
        code_version=CODE_VERSION, gpu_on=GPU_ON
    )
    RUN_ID = compute_run_id(args.cifs, params)
    print(f"[INIT] RUN_ID = {RUN_ID} (GPU={GPU_ON})")

    # --- 3. Setup Output Layout (names stamped with RUN_ID) ---
    root_dir = f"GQR14_TDSE_{RUN_ID}"
    runs_d = os.path.join(root_dir, "runs")
    der_d = os.path.join(root_dir, "derived")
    plot_d = os.path.join(root_dir, "plots")
    mani_d = os.path.join(root_dir, "manifests")
    os.makedirs(runs_d, exist_ok=True)
    os.makedirs(der_d, exist_ok=True)
    os.makedirs(plot_d, exist_ok=True)
    os.makedirs(mani_d, exist_ok=True)

    arrhenius_rows = []
    all_output_files = []

    # --- 4. Run Simulation for each Temperature ---
    for T in args.temps:
        t, s, J, d = run_simulation_logic(
            XH, XI, XJ, T,
            dt_fs=args.dtfs, steps=args.steps,
            tau_fs=args.tau_fs, center_fs=args.center_fs,
            rng_seed=args.seed,
            temp_noise_scale=args.temp_noise_scale
        )
        base_name = f"HIJ_T{T}K_{RUN_ID}"

        # Save raw timeseries CSV
        csv_path = os.path.join(runs_d, f"{base_name}.csv")
        header = f"# RUN_ID={RUN_ID} CODE_VERSION={CODE_VERSION} T_K={T}\n"
        header += "t_fs,s_total,J_metric,d_OO"
        np.savetxt(csv_path, np.column_stack([t, s, J, d]),
                   delimiter=",", header=header, comments="")
        all_output_files.append(csv_path)

        # Save CDFs
        xsJ, cJ = cdf_series(J)
        xsd, cd = cdf_series(d)
        cdfJ_path = os.path.join(der_d, f"{base_name}_cdf_J.csv")
        cdfD_path = os.path.join(der_d, f"{base_name}_cdf_dOO.csv")
        np.savetxt(cdfJ_path, np.column_stack([xsJ, cJ]), delimiter=",", header="x,cdf", comments="")
        np.savetxt(cdfD_path, np.column_stack([xsd, cd]), delimiter=",", header="x,cdf", comments="")
        all_output_files.extend([cdfJ_path, cdfD_path])

        # Save Per-run quick plot (Timeseries)
        fig, ax1 = plt.subplots(figsize=(10, 5))
        ax1.set_xlabel("t (fs)")
        ax1.plot(t, J, "C0-", label="J (metric)")
        ax1.plot(t, s, "C1--", label="s_total (morph)")
        ax1.set_ylabel("J / s")
        ax2 = ax1.twinx()
        ax2.plot(t, d, "C2-", label="d(O-O) [Å]")
        ax2.set_ylabel("d(O-O) [Å]")
        fig.legend(loc='upper left', bbox_to_anchor=(0.1, 0.9))
        fig.suptitle(f"{base_name}")
        fig.tight_layout(rect=[0, 0, 1, 0.93])
        plot_path = os.path.join(plot_d, f"{base_name}_timeseries.png")
        plt.savefig(plot_path, dpi=180)
        plt.close(fig)
        all_output_files.append(plot_path)

        # Add data for Arrhenius plot (use steady-state tail)
        tail = int(0.8 * len(J))
        J_mean = float(np.mean(J[tail:]))
        lnJ = (math.log(J_mean) if J_mean > 0 else float("nan"))
        arrhenius_rows.append(dict(
            T_K=T,
            invT_1overK=(1.0 / T),
            lnJ=lnJ,
            J_mean=J_mean,
            d_mean=float(np.mean(d[tail:])),
            RUN_ID=RUN_ID
        ))

    # --- 5. Generate Arrhenius Table and Plot ---
    try:
        import pandas as pd
        arr_df = pd.DataFrame(arrhenius_rows).sort_values("T_K")
        arr_csv_path = os.path.join(der_d, f"arrhenius_table_{RUN_ID}.csv")
        arr_df.to_csv(arr_csv_path, index=False)
        all_output_files.append(arr_csv_path)

        arr_png_path = os.path.join(plot_d, f"arrhenius_plot_{RUN_ID}.png")
        plt.figure(figsize=(7, 5))
        finite = arr_df.dropna(subset=["invT_1overK", "lnJ"])
        if len(finite) >= 2:
            x = finite["invT_1overK"].values
            y = finite["lnJ"].values
            A = np.vstack([x, np.ones_like(x)]).T
            slope, intercept = np.linalg.lstsq(A, y, rcond=None)[0]
            yfit = slope * x + intercept
            Ea_over_k = -slope
            plt.plot(x, y, "o", label="Data (mean of tail)")
            plt.plot(x, yfit, "-", label=f"Fit: -Ea/k = {-Ea_over_k:.3g} K")
            plt.xlabel("1/T (K⁻¹)")
            plt.ylabel("ln ⟨J_metric⟩ (a.u.)")
            plt.title(f"Arrhenius Plot — RUN_ID={RUN_ID}")
            plt.grid(True, alpha=0.3)
            plt.legend()
        else:
            plt.text(0.5, 0.5, "Need ≥ 2 temperatures for Arrhenius plot",
                     ha="center", va="center", transform=plt.gca().transAxes)
        plt.tight_layout()
        plt.savefig(arr_png_path, dpi=180)
        plt.close()
        all_output_files.append(arr_png_path)

    except ImportError:
        print("[Warning] Pandas not installed. Skipping Arrhenius table/plot generation.")

    # --- 6. Generate Manifest ---
    manifest_items = []
    for f_path in all_output_files:
        try:
            manifest_items.append({
                "file": f_path.replace(root_dir + os.path.sep, ""),
                "sha256": sha256_of(f_path),
                "bytes": os.path.getsize(f_path)
            })
        except Exception as e:
            print(f"[Warning] Could not hash file {f_path}: {e}")

    cif_hashes = [{"file": os.path.basename(p), "sha256": sha256_of(p)} for p in args.cifs]
    manifest = dict(
        run_id=RUN_ID,
        code_version=CODE_VERSION,
        created_utc=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        gpu_on=GPU_ON,
        parameters=params,
        input_cifs=cif_hashes,
        output_files=manifest_items
    )
    mani_json_path = os.path.join(mani_d, f"manifest_{RUN_ID}.json")
    with open(mani_json_path, "w") as f:
        json.dump(manifest, f, indent=2)

    # --- 7. Create Final ZIP Bundle ---
    outzip_path = f"GQR14_TDSE_veracity_{RUN_ID}.zip"
    print(f"\n[Packaging] Creating {outzip_path}...")
    with zipfile.ZipFile(outzip_path, "w", compression=zipfile.ZIP_DEFLATED) as z:
        # Add the manifest first
        z.write(mani_json_path, arcname=os.path.join(root_dir, f"manifest_{RUN_ID}.json"))
        # Add all other files
        for f_path in all_output_files:
            arc_path = f_path.replace(root_dir + os.path.sep, "")
            z.write(f_path, arcname=os.path.join(root_dir, arc_path))

    print(f"[SUCCESS] Wrote {outzip_path} (RUN_ID={RUN_ID})")

if __name__ == "__main__":
    # Check for CIF files locally if user doesn't provide them
    # This is a fallback for testing
    if not any(arg.endswith('.cif') for arg in sys.argv):
        print("[Info] No CIFs provided. Checking for 8F4H, 8F4I, 8F4J.cif locally...")
        cif_files = ["8F4H.cif", "8F4I.cif", "8F4J.cif"]
        if all(os.path.exists(f) for f in cif_files):
            print("[Info] Found local CIFs. Running with defaults.")
            sys.argv.extend(["--cifs"] + cif_files)
        else:
            print("[Info] Local CIFs not found. Please specify --cifs path/to/H.cif ...")

    main()

[Info] No CIFs provided. Checking for 8F4H, 8F4I, 8F4J.cif locally...
[Info] Found local CIFs. Running with defaults.


usage: colab_kernel_launcher.py [-h] --cifs CIFS CIFS CIFS
                                [--temps TEMPS [TEMPS ...]] [--steps STEPS]
                                [--dtfs DTFS] [--seed SEED] [--tau_fs TAU_FS]
                                [--center_fs CENTER_FS]
                                [--temp_noise_scale TEMP_NOISE_SCALE]
colab_kernel_launcher.py: error: unrecognized arguments: -f /root/.local/share/jupyter/runtime/kernel-64316fdf-22f5-43ad-9672-2fad6d5d4d3b.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
GQR–XIV — Single-Run Veracity ZIP (atomic provenance)
- One command generates: per-T timeseries, CDFs, Arrhenius, manifest, ZIP
- All artifacts stamped with a single RUN_ID (SHA-256 over inputs+params+code_version)
- Fixes logical flaws: O-O distance is now a *result* of H-I-J morphing,
  not an artificially imposed value.

Usage:
1. Save this file as `gqr14_make_veracity_zip.py`.
2. Place your CIF files (e.g., 8F4H.cif, 8F4I.cif, 8F4J.cif) in the same directory.
3. Run from your terminal (or a notebook cell):
   !python gqr14_make_veracity_zip.py \
       --cifs 8F4H.cif 8F4I.cif 8F4J.cif \
       --temps 285 295 305 315 325 \
       --steps 20000 --dtfs 1.0
"""
import sys
import os
import json
import time
import math
import hashlib
import argparse
import subprocess
import zipfile
import numpy as np
import matplotlib.pyplot as plt

# --- Optional GPU (CuPy), silent fallback ---
try:
    import cupy as cp
    xp = cp
    GPU_ON = True
except ImportError:
    xp = np
    GPU_ON = False

# --- Ensure gemmi available (for CIF) ---
try:
    import gemmi
except ImportError:
    print("Gemmi not found, installing...", file=sys.stderr)
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", "gemmi"], check=True)
    import gemmi

# This version string is part of the hash, ensuring code changes alter the RUN_ID
CODE_VERSION = "GQR14-TDSE-VERACITY-2.2-FIXED"

# ---------- Helpers ----------

def sha256_of(path: str) -> str:
    """Calculates the SHA-256 hash of a file."""
    h = hashlib.sha256()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(1 << 20), b""):
            h.update(chunk)
    return h.hexdigest()

def get_oxygen_coords(path: str) -> np.ndarray:
    """Loads all oxygen coordinates from a CIF/mmCIF file."""
    if not os.path.exists(path):
        raise FileNotFoundError(f"File not found: {path}")

    O_coords = []
    try:
        # Try macromolecular path first (PDB/mmCIF)
        st = gemmi.read_structure(path)
        cell = st.cell
        for model in st:
            for chain in model:
                for res in chain:
                    for atom in res:
                        elem = atom.element.name.upper()
                        name = atom.name.upper()
                        if elem == 'O' or name.startswith('O'):
                            pos = cell.orthogonalize(atom.pos)
                            O_coords.append([pos.x, pos.y, pos.z])
        if O_coords:
            return np.asarray(O_coords, dtype=np.float64)

        # Fallback to small structure (CIF)
        ss = gemmi.read_small_structure(path)
        cell = ss.cell
        for site in ss.sites:
            elem = (getattr(site, "type_symbol", "") or str(getattr(site, "element", ""))).upper()
            label = (getattr(site, "label", "") or "").upper()
            if elem == 'O' or label.startswith('O'):
                pos = cell.orthogonalize(site.frac)
                O_coords.append([pos.x, pos.y, pos.z])

        if O_coords:
            return np.asarray(O_coords, dtype=np.float64)

    except Exception as e:
        raise RuntimeError(f"Failed to parse CIF file {path}: {e}")

    if not O_coords:
        raise RuntimeError(f"No Oxygen atoms found in {path}")

    return np.asarray(O_coords, dtype=np.float64)

def load_and_align_geometries(paths: list) -> (np.ndarray, np.ndarray, np.ndarray, int):
    """Loads H, I, J geometries, finds the nearest O-O pair, and aligns them."""

    def get_nearest_oo_pair(coords):
        n = len(coords)
        if n < 2:
            raise ValueError("Not enough oxygen atoms to find a pair.")
        min_dist = np.inf
        pair_indices = (0, 1)
        for i in range(n):
            for j in range(i + 1, n):
                dist = np.linalg.norm(coords[i] - coords[j])
                if dist < min_dist:
                    min_dist = dist
                    pair_indices = (i, j)
        return coords[list(pair_indices)], min_dist

    coords_H, dH = get_nearest_oo_pair(get_oxygen_coords(paths[0]))
    coords_I, dI = get_nearest_oo_pair(get_oxygen_coords(paths[1]))
    coords_J, dJ = get_nearest_oo_pair(get_oxygen_coords(paths[2]))

    # Center all pairs at (0,0,0) for stable interpolation
    coords_H -= coords_H.mean(axis=0, keepdims=True)
    coords_I -= coords_I.mean(axis=0, keepdims=True)
    coords_J -= coords_J.mean(axis=0, keepdims=True)

    # Simple alignment: align the I-J vector to the H-I vector (optional but good practice)
    # This is a placeholder; real alignment is complex. For now, centering is key.

    print(f"[Geo] O–O Pair Distances: H={dH:.3f} Å, I={dI:.3f} Å, J={dJ:.3f} Å")
    # Use a dummy atom count for now, as we only care about the O-O pair
    atoms_unified = 2

    return coords_H, coords_I, coords_J, atoms_unified

def sigmoid(t, tau, x0=1.0):
    """Sigmoid function for morphing."""
    return 1.0 / (1.0 + np.exp(-(t / tau - x0)))

def mix(A, B, s):
    """Linear interpolation (mixing) of two coordinate sets."""
    return A * (1.0 - s) + B * s

def pair_distance(P):
    """Calculates distance between the two atoms in the pair."""
    return float(np.linalg.norm(P[0] - P[1]))

def compute_run_id(cif_paths, params_dict) -> str:
    """Generates a unique, reproducible RUN_ID from inputs."""
    h = hashlib.sha256()
    h.update(CODE_VERSION.encode())
    for p in cif_paths:
        h.update(os.path.basename(p).encode())
        h.update(sha256_of(p).encode())
    h.update(json.dumps(params_dict, sort_keys=True).encode())
    return h.hexdigest()[:16] # short RUN_ID for filenames

def run_simulation_logic(XH, XI, XJ, T_K: float, dt_fs=0.5, steps=40000,
                       tau_fs=7000.0, center_fs=10000.0,
                       rng_seed=42, temp_noise_scale=0.01):
    """
    Runs the mock simulation logic.
    - Morphs geometry H->I->J
    - Measures d(O-O) as a *result*
    - Calculates a plausible J metric based on d(O-O) and T
    """
    # T-dependent RNG for deterministic noise
    rng = np.random.default_rng(int(rng_seed + T_K * 100))

    t_list = []
    s_list = []
    J_list = []
    d_list = []

    print(f"[Sim] Running T = {T_K} K...")

    for step in range(steps + 1):
        t_fs = step * dt_fs

        # Morphing parameter s_total goes from 0 (H) -> 1 (I) -> 2 (J)
        s_total = 2.0 * sigmoid(t_fs, tau_fs, x0=(center_fs / tau_fs))
        s_total = np.clip(s_total, 0.0, 2.0)

        # Interpolate coordinates
        if s_total <= 1.0:
            X = mix(XH, XI, s_total)
        else:
            X = mix(XI, XJ, s_total - 1.0)

        # Add T-dependent noise to coordinates
        X += rng.normal(0.0, 0.001 * (T_K / 300.0), X.shape)

        # MEASURE the O-O distance (this is the key fix)
        dOO = pair_distance(X)

        # Calculate the J metric
        # A plausible function:
        # - A base value
        # - A "resonance" peak when s_total is near 1.0 (structure I)
        # - A term that depends on the O-O distance (e.g., exponential decay)
        # - T-dependent noise

        resonance_peak = 0.5 * np.exp(-((s_total - 1.05) / 0.15)**2)
        distance_term = 0.5 * np.exp(-(dOO - 1.45)**2 / 0.1)

        # Temperature affects the noise amplitude
        kT_noise = rng.normal(0.0, temp_noise_scale * (T_K / 300.0))

        J_val = (resonance_peak + distance_term + 0.1) * (1.0 + kT_noise)
        J_val = np.clip(J_val, 0.0, None) # J must be positive

        t_list.append(t_fs)
        s_list.append(s_total)
        J_list.append(J_val)
        d_list.append(dOO)

    return (np.array(t_list), np.array(s_list), np.array(J_list), np.array(d_list))

def cdf_series(x: np.ndarray):
    """Calculates the Cumulative Distribution Function (CDF) for a series."""
    x = np.asarray(x, float)
    x = x[~np.isnan(x)]
    if x.size == 0: return np.array([]), np.array([])
    xs = np.sort(x)
    cdf = np.arange(1, xs.size + 1) / xs.size
    return xs, cdf

def main():
    ap = argparse.ArgumentParser(description="GQR-XIV Single-Run Veracity ZIP Generator")
    ap.add_argument("--cifs", nargs=3, required=True, help="Three CIFs for H, I, J structures")
    ap.add_argument("--temps", nargs="+", type=int, default=[285, 295, 305, 315, 325], help="List of temperatures in Kelvin")
    ap.add_argument("--steps", type=int, default=20000, help="Number of simulation steps")
    ap.add_argument("--dtfs", type=float, default=1.0, help="Time step in femtoseconds")
    ap.add_argument("--seed", type=int, default=42, help="Base random seed")
    ap.add_argument("--tau_fs", type=float, default=7000.0, help="Morphing timescale (tau)")
    ap.add_argument("--center_fs", type=float, default=10000.0, help="Morphing timescale (center)")
    ap.add_argument("--temp_noise_scale", type=float, default=0.05, help="Scaling factor for temperature-dependent noise")

    # --- THIS IS THE FIX ---
    # Change `parse_args()` to `parse_known_args()`
    # This tells argparse to ignore unknown arguments (like the -f from Jupyter)
    args, unknown = ap.parse_known_args()
    # -----------------------

    # --- 1. Load Geometries ---
    try:
        XH, XI, XJ, atoms_unified = load_and_align_geometries(args.cifs)
    except Exception as e:
        print(f"[FATAL] Error loading geometries: {e}", file=sys.stderr)
        sys.exit(1)

    # --- 2. Compute RUN_ID ---
    params = dict(
        temps=args.temps, steps=args.steps, dt_fs=args.dtfs,
        seed=args.seed, atoms_unified=atoms_unified,
        tau_fs=args.tau_fs, center_fs=args.center_fs,
        temp_noise_scale=args.temp_noise_scale,
        code_version=CODE_VERSION, gpu_on=GPU_ON
    )
    RUN_ID = compute_run_id(args.cifs, params)
    print(f"[INIT] RUN_ID = {RUN_ID} (GPU={GPU_ON})")

    # --- 3. Setup Output Layout (names stamped with RUN_ID) ---
    root_dir = f"GQR14_TDSE_{RUN_ID}"
    runs_d = os.path.join(root_dir, "runs")
    der_d = os.path.join(root_dir, "derived")
    plot_d = os.path.join(root_dir, "plots")
    mani_d = os.path.join(root_dir, "manifests")
    os.makedirs(runs_d, exist_ok=True)
    os.makedirs(der_d, exist_ok=True)
    os.makedirs(plot_d, exist_ok=True)
    os.makedirs(mani_d, exist_ok=True)

    arrhenius_rows = []
    all_output_files = []

    # --- 4. Run Simulation for each Temperature ---
    for T in args.temps:
        t, s, J, d = run_simulation_logic(
            XH, XI, XJ, T,
            dt_fs=args.dtfs, steps=args.steps,
            tau_fs=args.tau_fs, center_fs=args.center_fs,
            rng_seed=args.seed,
            temp_noise_scale=args.temp_noise_scale
        )
        base_name = f"HIJ_T{T}K_{RUN_ID}"

        # Save raw timeseries CSV
        csv_path = os.path.join(runs_d, f"{base_name}.csv")
        header = f"# RUN_ID={RUN_ID} CODE_VERSION={CODE_VERSION} T_K={T}\n"
        header += "t_fs,s_total,J_metric,d_OO"
        np.savetxt(csv_path, np.column_stack([t, s, J, d]),
                   delimiter=",", header=header, comments="")
        all_output_files.append(csv_path)

        # Save CDFs
        xsJ, cJ = cdf_series(J)
        xsd, cd = cdf_series(d)
        cdfJ_path = os.path.join(der_d, f"{base_name}_cdf_J.csv")
        cdfD_path = os.path.join(der_d, f"{base_name}_cdf_dOO.csv")
        np.savetxt(cdfJ_path, np.column_stack([xsJ, cJ]), delimiter=",", header="x,cdf", comments="")
        np.savetxt(cdfD_path, np.column_stack([xsd, cd]), delimiter=",", header="x,cdf", comments="")
        all_output_files.extend([cdfJ_path, cdfD_path])

        # Save Per-run quick plot (Timeseries)
        fig, ax1 = plt.subplots(figsize=(10, 5))
        ax1.set_xlabel("t (fs)")
        ax1.plot(t, J, "C0-", label="J (metric)")
        ax1.plot(t, s, "C1--", label="s_total (morph)")
        ax1.set_ylabel("J / s")
        ax2 = ax1.twinx()
        ax2.plot(t, d, "C2-", label="d(O-O) [Å]")
        ax2.set_ylabel("d(O-O) [Å]")
        fig.legend(loc='upper left', bbox_to_anchor=(0.1, 0.9))
        fig.suptitle(f"{base_name}")
        fig.tight_layout(rect=[0, 0, 1, 0.93])
        plot_path = os.path.join(plot_d, f"{base_name}_timeseries.png")
        plt.savefig(plot_path, dpi=180)
        plt.close(fig)
        all_output_files.append(plot_path)

        # Add data for Arrhenius plot (use steady-state tail)
        tail = int(0.8 * len(J))
        J_mean = float(np.mean(J[tail:]))
        lnJ = (math.log(J_mean) if J_mean > 0 else float("nan"))
        arrhenius_rows.append(dict(
            T_K=T,
            invT_1overK=(1.0 / T),
            lnJ=lnJ,
            J_mean=J_mean,
            d_mean=float(np.mean(d[tail:])),
            RUN_ID=RUN_ID
        ))

    # --- 5. Generate Arrhenius Table and Plot ---
    try:
        import pandas as pd
        arr_df = pd.DataFrame(arrhenius_rows).sort_values("T_K")
        arr_csv_path = os.path.join(der_d, f"arrhenius_table_{RUN_ID}.csv")
        arr_df.to_csv(arr_csv_path, index=False)
        all_output_files.append(arr_csv_path)

        arr_png_path = os.path.join(plot_d, f"arrhenius_plot_{RUN_ID}.png")
        plt.figure(figsize=(7, 5))
        finite = arr_df.dropna(subset=["invT_1overK", "lnJ"])
        if len(finite) >= 2:
            x = finite["invT_1overK"].values
            y = finite["lnJ"].values
            A = np.vstack([x, np.ones_like(x)]).T
            slope, intercept = np.linalg.lstsq(A, y, rcond=None)[0]
            yfit = slope * x + intercept
            Ea_over_k = -slope
            plt.plot(x, y, "o", label="Data (mean of tail)")
            plt.plot(x, yfit, "-", label=f"Fit: -Ea/k = {-Ea_over_k:.3g} K")
            plt.xlabel("1/T (K⁻¹)")
            plt.ylabel("ln ⟨J_metric⟩ (a.u.)")
            plt.title(f"Arrhenius Plot — RUN_ID={RUN_ID}")
            plt.grid(True, alpha=0.3)
            plt.legend()
        else:
            plt.text(0.5, 0.5, "Need ≥ 2 temperatures for Arrhenius plot",
                     ha="center", va="center", transform=plt.gca().transAxes)
        plt.tight_layout()
        plt.savefig(arr_png_path, dpi=180)
        plt.close()
        all_output_files.append(arr_png_path)

    except ImportError:
        print("[Warning] Pandas not installed. Skipping Arrhenius table/plot generation.")

    # --- 6. Generate Manifest ---
    manifest_items = []
    for f_path in all_output_files:
        try:
            manifest_items.append({
                "file": f_path.replace(root_dir + os.path.sep, ""),
                "sha256": sha256_of(f_path),
                "bytes": os.path.getsize(f_path)
            })
        except Exception as e:
            print(f"[Warning] Could not hash file {f_path}: {e}")

    cif_hashes = [{"file": os.path.basename(p), "sha256": sha256_of(p)} for p in args.cifs]
    manifest = dict(
        run_id=RUN_ID,
        code_version=CODE_VERSION,
        created_utc=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        gpu_on=GPU_ON,
        parameters=params,
        input_cifs=cif_hashes,
        output_files=manifest_items
    )
    mani_json_path = os.path.join(mani_d, f"manifest_{RUN_ID}.json")
    with open(mani_json_path, "w") as f:
        json.dump(manifest, f, indent=2)

    # --- 7. Create Final ZIP Bundle ---
    outzip_path = f"GQR14_TDSE_veracity_{RUN_ID}.zip"
    print(f"\n[Packaging] Creating {outzip_path}...")
    with zipfile.ZipFile(outzip_path, "w", compression=zipfile.ZIP_DEFLATED) as z:
        # Add the manifest first
        z.write(mani_json_path, arcname=os.path.join(root_dir, f"manifest_{RUN_ID}.json"))
        # Add all other files
        for f_path in all_output_files:
            # Calculate arcname to be relative to the root_dir
            arc_name = os.path.relpath(f_path, os.path.dirname(root_dir))
            z.write(f_path, arcname=arc_name)

    print(f"[SUCCESS] Wrote {outzip_path} (RUN_ID={RUN_ID})")

if __name__ == "__main__":
    # This logic automatically adds --cifs arguments if they are missing
    # AND if the default files (8F4H.cif, etc.) exist locally.
    # This is helpful for running in a notebook.

    # Check if --cifs is already provided
    cif_arg_present = any(arg == '--cifs' for arg in sys.argv)

    if not cif_arg_present:
        print("[Info] No --cifs provided. Checking for 8F4H, 8F4I, 8F4J.cif locally...")
        cif_files = ["8F4H.cif", "8F4I.cif", "8F4J.cif"]
        if all(os.path.exists(f) for f in cif_files):
            print("[Info] Found local CIFs. Appending them to arguments.")
            sys.argv.extend(["--cifs"] + cif_files)
        else:
            print("[Info] Local CIFs not found. Proceeding (argparse will likely fail if --cifs is required).")

    main()

[FATAL] Error loading geometries: Failed to parse CIF file 8F4H.cif: orthogonalize(): incompatible function arguments. The following argument types are supported:
    1. orthogonalize(self, arg: gemmi.Fractional, /) -> gemmi.Position

Invoked with types: gemmi.UnitCell, gemmi.Position
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/tmp/ipython-input-2996884331.py", line 78, in get_oxygen_coords
    pos = cell.orthogonalize(atom.pos)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: orthogonalize(): incompatible function arguments. The following argument types are supported:
    1. orthogonalize(self, arg: gemmi.Fractional, /) -> gemmi.Position

Invoked with types: gemmi.UnitCell, gemmi.Position

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/tmp/ipython-input-2996884331.py", line 250, in main
    XH, XI, XJ, atoms_unified = load_and_align_geometries(args.cifs)
                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2996884331.py", line 121, in load_and_align_geometries
    coords_H, dH = get_nearest_oo_pair(get_oxygen_coords(paths[0]))
                                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-2996884331.py", line 97, in get_oxygen

TypeError: object of type 'NoneType' has no len()

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
GQR–XIV — Single-Run Veracity ZIP (atomic provenance)
- One command generates: per-T timeseries, CDFs, Arrhenius, manifest, ZIP
- All artifacts stamped with a single RUN_ID (SHA-256 over inputs+params+code_version)
- Fixes logical flaws: O-O distance is now a *result* of H-I-J morphing.
- Fixes gemmi TypeError for coordinate handling.

Usage:
1. Save this file as `gqr14_make_veracity_zip.py`.
2. Place your CIF files (e.g., 8F4H.cif, 8F4I.cif, 8F4J.cif) in the same directory.
3. Run from your terminal (or a notebook cell):
   !python gqr14_make_veracity_zip.py \
       --cifs 8F4H.cif 8F4I.cif 8F4J.cif \
       --temps 285 295 305 315 325 \
       --steps 20000 --dtfs 1.0
"""
import sys
import os
import json
import time
import math
import hashlib
import argparse
import subprocess
import zipfile
import numpy as np
import matplotlib.pyplot as plt

# --- Optional GPU (CuPy), silent fallback ---
try:
    import cupy as cp
    xp = cp
    GPU_ON = True
except ImportError:
    xp = np
    GPU_ON = False

# --- Ensure gemmi available (for CIF) ---
try:
    import gemmi
except ImportError:
    print("Gemmi not found, installing...", file=sys.stderr)
    subprocess.run([sys.executable, "-m", "pip", "install", "-q", "gemmi"], check=True)
    import gemmi

# This version string is part of the hash, ensuring code changes alter the RUN_ID
CODE_VERSION = "GQR14-TDSE-VERACITY-2.3-FIXED"

# ---------- Helpers ----------

def sha256_of(path: str) -> str:
    """Calculates the SHA-256 hash of a file."""
    h = hashlib.sha256()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(1 << 20), b""):
            h.update(chunk)
    return h.hexdigest()

def get_oxygen_coords(path: str) -> np.ndarray:
    """Loads all oxygen coordinates from a CIF/mmCIF file."""
    if not os.path.exists(path):
        raise FileNotFoundError(f"File not found: {path}")

    O_coords = []
    try:
        # Try macromolecular path first (PDB/mmCIF)
        st = gemmi.read_structure(path)
        cell = st.cell
        for model in st:
            for chain in model:
                for res in chain:
                    for atom in res:
                        elem = atom.element.name.upper()
                        name = atom.name.upper()
                        if elem == 'O' or name.startswith('O'):
                            # --- FIX ---
                            # atom.pos is already gemmi.Position (Cartesian/Orthogonal)
                            # No conversion needed.
                            pos = atom.pos
                            # -----------
                            O_coords.append([pos.x, pos.y, pos.z])
        if O_coords:
            return np.asarray(O_coords, dtype=np.float64)

        # Fallback to small structure (CIF)
        ss = gemmi.read_small_structure(path)
        cell = ss.cell
        for site in ss.sites:
            elem = (getattr(site, "type_symbol", "") or str(getattr(site, "element", ""))).upper()
            label = (getattr(site, "label", "") or "").upper()
            if elem == 'O' or label.startswith('O'):
                # This is the correct usage for fractional coordinates
                pos = cell.orthogonalize(site.frac)
                O_coords.append([pos.x, pos.y, pos.z])

        if O_coords:
            return np.asarray(O_coords, dtype=np.float64)

    except Exception as e:
        # If the first method failed with the TypeError, this block would be hit.
        # Now we try the small structure path as a fallback.
        try:
            ss = gemmi.read_small_structure(path)
            cell = ss.cell
            for site in ss.sites:
                elem = (getattr(site, "type_symbol", "") or str(getattr(site, "element", ""))).upper()
                label = (getattr(site, "label", "") or "").upper()
                if elem == 'O' or label.startswith('O'):
                    pos = cell.orthogonalize(site.frac)
                    O_coords.append([pos.x, pos.y, pos.z])

            if O_coords:
                return np.asarray(O_coords, dtype=np.float64)

        except Exception as e2:
            raise RuntimeError(f"Failed to parse CIF {path} with both methods. Macro error: {e}. Small error: {e2}")

    if not O_coords:
        raise RuntimeError(f"No Oxygen atoms found in {path}")

    return np.asarray(O_coords, dtype=np.float64)


def load_and_align_geometries(paths: list) -> (np.ndarray, np.ndarray, np.ndarray, int):
    """Loads H, I, J geometries, finds the nearest O-O pair, and aligns them."""

    def get_nearest_oo_pair(coords):
        n = len(coords)
        if n < 2:
            raise ValueError("Not enough oxygen atoms to find a pair.")
        min_dist = np.inf
        pair_indices = (0, 1)
        for i in range(n):
            for j in range(i + 1, n):
                dist = np.linalg.norm(coords[i] - coords[j])
                if dist < min_dist:
                    min_dist = dist
                    pair_indices = (i, j)
        return coords[list(pair_indices)], min_dist

    coords_H, dH = get_nearest_oo_pair(get_oxygen_coords(paths[0]))
    coords_I, dI = get_nearest_oo_pair(get_oxygen_coords(paths[1]))
    coords_J, dJ = get_nearest_oo_pair(get_oxygen_coords(paths[2]))

    # Center all pairs at (0,0,0) for stable interpolation
    coords_H -= coords_H.mean(axis=0, keepdims=True)
    coords_I -= coords_I.mean(axis=0, keepdims=True)
    coords_J -= coords_J.mean(axis=0, keepdims=True)

    # Simple alignment: align the I-J vector to the H-I vector (optional but good practice)
    # This is a placeholder; real alignment is complex. For now, centering is key.

    print(f"[Geo] O–O Pair Distances: H={dH:.3f} Å, I={dI:.3f} Å, J={dJ:.3f} Å")
    # Use a dummy atom count for now, as we only care about the O-O pair
    atoms_unified = 2

    return coords_H, coords_I, coords_J, atoms_unified

def sigmoid(t, tau, x0=1.0):
    """Sigmoid function for morphing."""
    return 1.0 / (1.0 + np.exp(-(t / tau - x0)))

def mix(A, B, s):
    """Linear interpolation (mixing) of two coordinate sets."""
    return A * (1.0 - s) + B * s

def pair_distance(P):
    """Calculates distance between the two atoms in the pair."""
    return float(np.linalg.norm(P[0] - P[1]))

def compute_run_id(cif_paths, params_dict) -> str:
    """Generates a unique, reproducible RUN_ID from inputs."""
    h = hashlib.sha256()
    h.update(CODE_VERSION.encode())
    for p in cif_paths:
        h.update(os.path.basename(p).encode())
        h.update(sha256_of(p).encode())
    h.update(json.dumps(params_dict, sort_keys=True).encode())
    return h.hexdigest()[:16] # short RUN_ID for filenames

def run_simulation_logic(XH, XI, XJ, T_K: float, dt_fs=0.5, steps=40000,
                       tau_fs=7000.0, center_fs=10000.0,
                       rng_seed=42, temp_noise_scale=0.01):
    """
    Runs the mock simulation logic.
    - Morphs geometry H->I->J
    - Measures d(O-O) as a *result*
    - Calculates a plausible J metric based on d(O-O) and T
    """
    # T-dependent RNG for deterministic noise
    rng = np.random.default_rng(int(rng_seed + T_K * 100))

    t_list = []
    s_list = []
    J_list = []
    d_list = []

    print(f"[Sim] Running T = {T_K} K...")

    for step in range(steps + 1):
        t_fs = step * dt_fs

        # Morphing parameter s_total goes from 0 (H) -> 1 (I) -> 2 (J)
        s_total = 2.0 * sigmoid(t_fs, tau_fs, x0=(center_fs / tau_fs))
        s_total = np.clip(s_total, 0.0, 2.0)

        # Interpolate coordinates
        if s_total <= 1.0:
            X = mix(XH, XI, s_total)
        else:
            X = mix(XI, XJ, s_total - 1.0)

        # Add T-dependent noise to coordinates
        X += rng.normal(0.0, 0.001 * (T_K / 300.0), X.shape)

        # MEASURE the O-O distance (this is the key fix)
        dOO = pair_distance(X)

        # Calculate the J metric
        # A plausible function:
        # - A base value
        # - A "resonance" peak when s_total is near 1.0 (structure I)
        # - A term that depends on the O-O distance (e.g., exponential decay)
        # - T-dependent noise

        resonance_peak = 0.5 * np.exp(-((s_total - 1.05) / 0.15)**2)
        distance_term = 0.5 * np.exp(-(dOO - 1.45)**2 / 0.1)

        # Temperature affects the noise amplitude
        kT_noise = rng.normal(0.0, temp_noise_scale * (T_K / 300.0))

        J_val = (resonance_peak + distance_term + 0.1) * (1.0 + kT_noise)
        J_val = np.clip(J_val, 0.0, None) # J must be positive

        t_list.append(t_fs)
        s_list.append(s_total)
        J_list.append(J_val)
        d_list.append(dOO)

    return (np.array(t_list), np.array(s_list), np.array(J_list), np.array(d_list))

def cdf_series(x: np.ndarray):
    """Calculates the Cumulative Distribution Function (CDF) for a series."""
    x = np.asarray(x, float)
    x = x[~np.isnan(x)]
    if x.size == 0: return np.array([]), np.array([])
    xs = np.sort(x)
    cdf = np.arange(1, xs.size + 1) / xs.size
    return xs, cdf

def main():
    ap = argparse.ArgumentParser(description="GQR-XIV Single-Run Veracity ZIP Generator")
    ap.add_argument("--cifs", nargs=3, required=True, help="Three CIFs for H, I, J structures")
    ap.add_argument("--temps", nargs="+", type=int, default=[285, 295, 305, 315, 325], help="List of temperatures in Kelvin")
    ap.add_argument("--steps", type=int, default=20000, help="Number of simulation steps")
    ap.add_argument("--dtfs", type=float, default=1.0, help="Time step in femtoseconds")
    ap.add_argument("--seed", type=int, default=42, help="Base random seed")
    ap.add_argument("--tau_fs", type=float, default=7000.0, help="Morphing timescale (tau)")
    ap.add_argument("--center_fs", type=float, default=10000.0, help="Morphing timescale (center)")
    ap.add_argument("--temp_noise_scale", type=float, default=0.05, help="Scaling factor for temperature-dependent noise")

    # --- THIS IS THE FIX ---
    # Change `parse_args()` to `parse_known_args()`
    # This tells argparse to ignore unknown arguments (like the -f from Jupyter)
    args, unknown = ap.parse_known_args()
    # -----------------------

    # --- 1. Load Geometries ---
    try:
        XH, XI, XJ, atoms_unified = load_and_align_geometries(args.cifs)
    except Exception as e:
        print(f"[FATAL] Error loading geometries: {e}", file=sys.stderr)
        sys.exit(1)

    # --- 2. Compute RUN_ID ---
    params = dict(
        temps=args.temps, steps=args.steps, dt_fs=args.dtfs,
        seed=args.seed, atoms_unified=atoms_unified,
        tau_fs=args.tau_fs, center_fs=args.center_fs,
        temp_noise_scale=args.temp_noise_scale,
        code_version=CODE_VERSION, gpu_on=GPU_ON
    )
    RUN_ID = compute_run_id(args.cifs, params)
    print(f"[INIT] RUN_ID = {RUN_ID} (GPU={GPU_ON})")

    # --- 3. Setup Output Layout (names stamped with RUN_ID) ---
    root_dir = f"GQR14_TDSE_{RUN_ID}"
    runs_d = os.path.join(root_dir, "runs")
    der_d = os.path.join(root_dir, "derived")
    plot_d = os.path.join(root_dir, "plots")
    mani_d = os.path.join(root_dir, "manifests")
    os.makedirs(runs_d, exist_ok=True)
    os.makedirs(der_d, exist_ok=True)
    os.makedirs(plot_d, exist_ok=True)
    os.makedirs(mani_d, exist_ok=True)

    arrhenius_rows = []
    all_output_files = []

    # --- 4. Run Simulation for each Temperature ---
    for T in args.temps:
        t, s, J, d = run_simulation_logic(
            XH, XI, XJ, T,
            dt_fs=args.dtfs, steps=args.steps,
            tau_fs=args.tau_fs, center_fs=args.center_fs,
            rng_seed=args.seed,
            temp_noise_scale=args.temp_noise_scale
        )
        base_name = f"HIJ_T{T}K_{RUN_ID}"

        # Save raw timeseries CSV
        csv_path = os.path.join(runs_d, f"{base_name}.csv")
        header = f"# RUN_ID={RUN_ID} CODE_VERSION={CODE_VERSION} T_K={T}\n"
        header += "t_fs,s_total,J_metric,d_OO"
        np.savetxt(csv_path, np.column_stack([t, s, J, d]),
                   delimiter=",", header=header, comments="")
        all_output_files.append(csv_path)

        # Save CDFs
        xsJ, cJ = cdf_series(J)
        xsd, cd = cdf_series(d)
        cdfJ_path = os.path.join(der_d, f"{base_name}_cdf_J.csv")
        cdfD_path = os.path.join(der_d, f"{base_name}_cdf_dOO.csv")
        np.savetxt(cdfJ_path, np.column_stack([xsJ, cJ]), delimiter=",", header="x,cdf", comments="")
        np.savetxt(cdfD_path, np.column_stack([xsd, cd]), delimiter=",", header="x,cdf", comments="")
        all_output_files.extend([cdfJ_path, cdfD_path])

        # Save Per-run quick plot (Timeseries)
        fig, ax1 = plt.subplots(figsize=(10, 5))
        ax1.set_xlabel("t (fs)")
        ax1.plot(t, J, "C0-", label="J (metric)")
        ax1.plot(t, s, "C1--", label="s_total (morph)")
        ax1.set_ylabel("J / s")
        ax2 = ax1.twinx()
        ax2.plot(t, d, "C2-", label="d(O-O) [Å]")
        ax2.set_ylabel("d(O-O) [Å]")
        fig.legend(loc='upper left', bbox_to_anchor=(0.1, 0.9))
        fig.suptitle(f"{base_name}")
        fig.tight_layout(rect=[0, 0, 1, 0.93])
        plot_path = os.path.join(plot_d, f"{base_name}_timeseries.png")
        plt.savefig(plot_path, dpi=180)
        plt.close(fig)
        all_output_files.append(plot_path)

        # Add data for Arrhenius plot (use steady-state tail)
        tail = int(0.8 * len(J))
        J_mean = float(np.mean(J[tail:]))
        lnJ = (math.log(J_mean) if J_mean > 0 else float("nan"))
        arrhenius_rows.append(dict(
            T_K=T,
            invT_1overK=(1.0 / T),
            lnJ=lnJ,
            J_mean=J_mean,
            d_mean=float(np.mean(d[tail:])),
            RUN_ID=RUN_ID
        ))

    # --- 5. Generate Arrhenius Table and Plot ---
    try:
        import pandas as pd
        arr_df = pd.DataFrame(arrhenius_rows).sort_values("T_K")
        arr_csv_path = os.path.join(der_d, f"arrhenius_table_{RUN_ID}.csv")
        arr_df.to_csv(arr_csv_path, index=False)
        all_output_files.append(arr_csv_path)

        arr_png_path = os.path.join(plot_d, f"arrhenius_plot_{RUN_ID}.png")
        plt.figure(figsize=(7, 5))
        finite = arr_df.dropna(subset=["invT_1overK", "lnJ"])
        if len(finite) >= 2:
            x = finite["invT_1overK"].values
            y = finite["lnJ"].values
            A = np.vstack([x, np.ones_like(x)]).T
            slope, intercept = np.linalg.lstsq(A, y, rcond=None)[0]
            yfit = slope * x + intercept
            Ea_over_k = -slope
            plt.plot(x, y, "o", label="Data (mean of tail)")
            plt.plot(x, yfit, "-", label=f"Fit: -Ea/k = {-Ea_over_k:.3g} K")
            plt.xlabel("1/T (K⁻¹)")
            plt.ylabel("ln ⟨J_metric⟩ (a.u.)")
            plt.title(f"Arrhenius Plot — RUN_ID={RUN_ID}")
            plt.grid(True, alpha=0.3)
            plt.legend()
        else:
            plt.text(0.5, 0.5, "Need ≥ 2 temperatures for Arrhenius plot",
                     ha="center", va="center", transform=plt.gca().transAxes)
        plt.tight_layout()
        plt.savefig(arr_png_path, dpi=180)
        plt.close()
        all_output_files.append(arr_png_path)

    except ImportError:
        print("[Warning] Pandas not installed. Skipping Arrhenius table/plot generation.")

    # --- 6. Generate Manifest ---
    manifest_items = []
    for f_path in all_output_files:
        try:
            manifest_items.append({
                "file": f_path.replace(root_dir + os.path.sep, ""),
                "sha256": sha256_of(f_path),
                "bytes": os.path.getsize(f_path)
            })
        except Exception as e:
            print(f"[Warning] Could not hash file {f_path}: {e}")

    cif_hashes = [{"file": os.path.basename(p), "sha256": sha256_of(p)} for p in args.cifs]
    manifest = dict(
        run_id=RUN_ID,
        code_version=CODE_VERSION,
        created_utc=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        gpu_on=GPU_ON,
        parameters=params,
        input_cifs=cif_hashes,
        output_files=manifest_items
    )
    mani_json_path = os.path.join(mani_d, f"manifest_{RUN_ID}.json")
    with open(mani_json_path, "w") as f:
        json.dump(manifest, f, indent=2)

    # --- 7. Create Final ZIP Bundle ---
    outzip_path = f"GQR14_TDSE_veracity_{RUN_ID}.zip"
    print(f"\n[Packaging] Creating {outzip_path}...")
    with zipfile.ZipFile(outzip_path, "w", compression=zipfile.ZIP_DEFLATED) as z:
        # Add the manifest first
        z.write(mani_json_path, arcname=os.path.join(root_dir, f"manifest_{RUN_ID}.json"))
        # Add all other files
        for f_path in all_output_files:
            # Calculate arcname to be relative to the root_dir
            arc_name = os.path.relpath(f_path, os.path.dirname(root_dir))
            z.write(f_path, arcname=arc_name)

    print(f"[SUCCESS] Wrote {outzip_path} (RUN_ID={RUN_ID})")

if __name__ == "__main__":
    # This logic automatically adds --cifs arguments if they are missing
    # AND if the default files (8F4H.cif, etc.) exist locally.
    # This is helpful for running in a notebook.

    # Check if --cifs is already provided
    cif_arg_present = any(arg == '--cifs' for arg in sys.argv)

    if not cif_arg_present:
        print("[Info] No --cifs provided. Checking for 8F4H, 8F4I, 8F4J.cif locally...")
        cif_files = ["8F4H.cif", "8F4I.cif", "8F4J.cif"]
        if all(os.path.exists(f) for f in cif_files):
            print("[Info] Found local CIFs. Appending them to arguments.")
            sys.argv.extend(["--cifs"] + cif_files)
        else:
            print("[Info] Local CIFs not found. Proceeding (argparse will likely fail if --cifs is required).")

    main()

[Geo] O–O Pair Distances: H=0.012 Å, I=0.020 Å, J=0.019 Å
[INIT] RUN_ID = 42c7a15fa5f3527a (GPU=True)
[Sim] Running T = 285 K...
[Sim] Running T = 295 K...
[Sim] Running T = 305 K...
[Sim] Running T = 315 K...
[Sim] Running T = 325 K...

[Packaging] Creating GQR14_TDSE_veracity_42c7a15fa5f3527a.zip...
[SUCCESS] Wrote GQR14_TDSE_veracity_42c7a15fa5f3527a.zip (RUN_ID=42c7a15fa5f3527a)


In [None]:
!pip install gemmi

Collecting gemmi
  Downloading gemmi-0.7.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (2.3 kB)
Downloading gemmi-0.7.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (2.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gemmi
Successfully installed gemmi-0.7.3
