<a href="https://colab.research.google.com/github/jamessutton600613-png/GC/blob/main/Untitled225.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U gemmi

Collecting gemmi
  Downloading gemmi-0.7.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (2.3 kB)
Downloading gemmi-0.7.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (2.6 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m2.6/2.6 MB[0m [31m26.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gemmi
Successfully installed gemmi-0.7.3


In [None]:
# ================================================================
# Geometry from CIF ‚Üí focusing on the Mn4CaO5 cluster (Robust Version)
# ================================================================
from pathlib import Path
import numpy as np
import sys, subprocess

# --- inputs you can tweak ---
CIF_PATH        = "8F4D.cif"
PAIR_CUTOFF_A   = 6.0             # √Ö; graph edges cutoff

def _ensure_gemmi(auto_install=False):
    try:
        import gemmi
        return gemmi
    except ImportError:
        if auto_install:
            print("üîß Installing gemmi‚Ä¶")
            subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "gemmi", "--quiet"])
            import gemmi
            return gemmi
        raise ImportError("gemmi not installed. Run: !pip install -U gemmi")

def extract_oec_geometry(cif_path: str):
    gemmi = _ensure_gemmi()
    print(f"Reading CIF file: {cif_path}...")
    st = gemmi.read_structure(str(cif_path))
    model = st[0]

    oec_residue = None
    # --- Find the OEC residue by looking for Manganese (MN) atoms ---
    for chain in model:
        for res in chain:
            for atom in res:
                if atom.element.name.upper() == 'MN':
                    oec_residue = res
                    break
            if oec_residue:
                break
        if oec_residue:
            break

    if not oec_residue:
        raise ValueError("Could not find any Manganese (MN) atoms to locate the OEC cluster.")

    print(f"Successfully located the OEC cluster. Residue name: '{oec_residue.name}'")

    # --- Now that we have the OEC residue, collect its atoms ---
    oec_atoms_data = []
    for atom in oec_residue:
        p = atom.pos
        oec_atoms_data.append({
            "pos": np.array([p.x, p.y, p.z], dtype=np.float32),
            "element": atom.element.name.upper(),
            "res_name": oec_residue.name.upper(),
            "atom_name": atom.name.strip(),
            "residue": oec_residue
        })

    # --- Collect all water molecules from the structure ---
    water_resn = {"HOH", "WAT", "H2O"}
    water_oxygens_data = []
    for chain in model:
        for res in chain:
            if res.name.upper() in water_resn:
                for atom in res:
                    if atom.element.name == 'O':
                        p = atom.pos
                        water_oxygens_data.append({
                             "pos": np.array([p.x, p.y, p.z], dtype=np.float32),
                             "residue": res
                        })
                        break

    # --- Find the 4 water molecules closest to the OEC's center ---
    oec_coords = np.array([atom['pos'] for atom in oec_atoms_data])
    oec_centroid = np.mean(oec_coords, axis=0)

    for water in water_oxygens_data:
        dist = np.linalg.norm(water['pos'] - oec_centroid)
        water['dist_to_oec'] = dist

    water_oxygens_data.sort(key=lambda w: w['dist_to_oec'])
    closest_water_residues = [w['residue'] for w in water_oxygens_data[:4]]
    print("Identified the 4 closest water molecules.")

    # --- Build the final list of atoms from the OEC and the closest waters ---
    final_atoms = oec_atoms_data
    for res in closest_water_residues:
         for atom in res:
            p = atom.pos
            final_atoms.append({
                "pos": np.array([p.x, p.y, p.z], dtype=np.float32),
                "element": atom.element.name.upper(),
                "res_name": res.name.upper(),
                "atom_name": atom.name.strip()
            })

    coords   = np.array([a['pos'] for a in final_atoms])
    elements = [a['element'] for a in final_atoms]
    N = len(coords)

    # --- Create Labels ---
    counts = {}
    labels = []
    for el in elements:
        key = el.capitalize()
        counts[key] = counts.get(key, 0) + 1
        labels.append(f"{key}{counts[key]}")

    print("Geometry extraction for the cluster is complete.")
    return coords, labels

# ---- build graph ----
nodes, labels = extract_oec_geometry(CIF_PATH)
N = len(nodes)

pairs = []
for i in range(N):
    for j in range(i + 1, N):
        r = np.linalg.norm(nodes[i] - nodes[j])
        if 0.5 < r <= PAIR_CUTOFF_A:
            pairs.append((i, j, float(r)))
pairs = np.array(pairs, dtype=object)

# ---- summary print ----
print("-" * 50)
print(f"‚úÖ OEC geometry: N={N} atoms  edges={len(pairs)} (cutoff {PAIR_CUTOFF_A} √Ö)")
print(f"   Atoms found: {sorted(labels)}")

Reading CIF file: 8F4D.cif...
Successfully located the OEC cluster. Residue name: 'OEY'
Identified the 4 closest water molecules.
Geometry extraction for the cluster is complete.
--------------------------------------------------
‚úÖ OEC geometry: N=18 atoms  edges=129 (cutoff 6.0 √Ö)
   Atoms found: ['Ca1', 'Mn1', 'Mn2', 'Mn3', 'Mn4', 'O1', 'O10', 'O11', 'O12', 'O13', 'O2', 'O3', 'O4', 'O5', 'O6', 'O7', 'O8', 'O9']


In [None]:
# --- basic constants ---
T0 = 1.0
beta0 = 1.0
DT_FS = 0.5
N_STEPS = 600
SPECIES = "OEC"
E_eV = 1.0
SNAP_STEPS = {0,10,50,100,200,400,600}

# --- simple gain function (safe placeholder) ---
def gqr_gain(r, occ_ij, pars):
    alpha = float(pars.get("ALPHA", 0.8))
    return 1.0 + alpha * float(occ_ij)

# --- variants used by the TDSE runner ---
VARIANTS = {
    "dry": {"ALPHA": 0.8},
    "h2o": {"ALPHA": 0.8},
    "h2s": {"ALPHA": 0.8},
}

In [None]:
# ================================================================
# ‚úÖ GQR‚ÄìTDSE (subgraph-safe) with correct unitary + visible output
# - Uses local indices (pairs_sub) ‚Üí no OOB
# - Corrects propagator: exp(-i E Œît) (no more early plateaus)
# - Hardens OUT_DIR handling; prints status; saves + plots
# NOTE: Assumes globals already defined elsewhere in your notebook:
#   nodes, labels, groups, pairs, jitter, T0, beta0, DT_FS, N_STEPS,
#   SPECIES, E_eV, VARIANTS (dry/h2o/h2s dicts), SNAP_STEPS, N
#   and gain function gqr_gain(r, occ_ij, pars).
# ================================================================
import os, numpy as np, matplotlib.pyplot as plt
from pathlib import Path

# ---------------- Path safety (handles corrupted OUT_DIR) ----------------
DEFAULT_OUT_DIR = "OEC_GQR_TDSE_MOVIE"
try:
    if not isinstance(OUT_DIR, (str, bytes, os.PathLike)):
        OUT_DIR = DEFAULT_OUT_DIR
except Exception:
    OUT_DIR = DEFAULT_OUT_DIR
OUT_DIR_PATH = Path(str(OUT_DIR))
OUT_DIR_PATH.mkdir(parents=True, exist_ok=True)
print(f"üìÇ Output dir: {OUT_DIR_PATH.resolve()}")

# ---------------- Variant runner (subgraph-aware) ----------------
def run_variant(name, pars, include_waters=True):
    # --- choose active nodes (remove waters if dry) ---
    active = list(range(N)) if include_waters else [i for i,g in enumerate(groups) if g!='water']
    imap   = {old:i for i,old in enumerate(active)}  # global‚Üílocal

    nodes_sub  = nodes[active]
    labels_sub = [labels[i] for i in active]
    groups_sub = [groups[i] for i in active]

    # --- subgraph edges with LOCAL indices ---
    pairs_sub = np.array([(imap[i], imap[j], r) for (i,j,r) in pairs if i in imap and j in imap], dtype=object)

    # --- local Hamiltonian using local indices ---
    def build_H_sub(psi):
        amp = np.abs(psi)
        occ = np.outer(amp, amp) / (amp.max()**2 + 1e-15)
        L = len(psi)
        H = np.zeros((L, L), complex)
        for k,(i,j,r) in enumerate(pairs_sub):
            g  = gqr_gain(r, occ[i,j], pars)
            Jk = (jitter[k] if k < len(jitter) else 0.0)
            tij = T0 * np.exp(-beta0*r) * g * (1.0 + Jk)
            H[i,j] = tij
            H[j,i] = np.conj(tij)
        return H

    # --- correct unitary propagator: exp(-i E Œît) ---
    def step_sub(psi):
        psi = np.asarray(psi, dtype=np.complex128)
        Hs  = build_H_sub(psi)                   # Hermitian
        E,V = np.linalg.eigh(Hs)                 # H = V diag(E) V‚Ä†
        phase = np.exp(-1j * np.asarray(E) * DT_FS)  # ‚úÖ exponential phase
        U = V @ np.diag(phase) @ V.conj().T
        psi = U @ psi
        return psi / (np.linalg.norm(psi) + 1e-15)

    # --- start on Mn1 if present else first metal ---
    start = labels_sub.index("Mn1") if "Mn1" in labels_sub else [i for i,g in enumerate(groups_sub) if g=='metal'][0]
    psi = np.zeros(len(active), complex); psi[start] = 1.0

    t_fs  = np.arange(N_STEPS)*DT_FS
    idx_m = [i for i,g in enumerate(groups_sub) if g=='metal']
    idx_o = [i for i,g in enumerate(groups_sub) if g=='mu-oxo']
    idx_w = [i for i,g in enumerate(groups_sub) if g=='water']

    pop_m, pop_o, pop_w, snaps = [], [], [], {}

    print(f"   ‚ñ∂ running {name}‚Ä¶ nodes={len(active)}, pairs={len(pairs_sub)}")
    for n in range(N_STEPS):
        a2 = np.abs(psi)**2
        pop_m.append(np.sum(a2[idx_m]) if idx_m else 0.0)
        pop_o.append(np.sum(a2[idx_o]) if idx_o else 0.0)
        pop_w.append(np.sum(a2[idx_w]) if idx_w else 0.0)
        if int(t_fs[n]) in SNAP_STEPS:
            snaps[int(t_fs[n])] = a2.copy()
        psi = step_sub(psi)

    save_path = OUT_DIR_PATH / f"state_{SPECIES}_E{E_eV:.1f}_{name}.npz"
    np.savez_compressed(
        str(save_path),
        nodes=nodes_sub,
        labels=np.array(labels_sub, object),
        groups=np.array(groups_sub, object),
        t=t_fs,
        pop_metal=np.array(pop_m, float),
        pop_muoxo=np.array(pop_o, float),
        pop_water=np.array(pop_w, float),
        snaps=snaps,
        pairs=pairs_sub,
    )
    print(f"   üíæ saved {save_path.name}")
    return np.array(t_fs, float), np.array(pop_m, float), np.array(pop_o, float), np.array(pop_w, float), (nodes_sub, labels_sub, groups_sub, snaps)

# ---------------- Run all three variants ----------------
print("‚ñ∂ DRY (no waters)‚Ä¶")
t_dry, m_dry, o_dry, w_dry, dry_pkg = run_variant("dry", VARIANTS["dry"], include_waters=False)
print("‚ñ∂ H2O (hydrated)‚Ä¶")
t_h2o, m_h2o, o_h2o, w_h2o, h2o_pkg = run_variant("h2o", VARIANTS["h2o"], include_waters=True)
print("‚ñ∂ H2S (sulfidated)‚Ä¶")
t_h2s, m_h2s, o_h2s, w_h2s, h2s_pkg = run_variant("h2s", VARIANTS["h2s"], include_waters=True)

# ---------------- Quick sanity prints ----------------
def _shape(x):
    try: return tuple(x.shape)
    except: return type(x).__name__
print(f"‚úî DRY:  t={_shape(t_dry)} m={_shape(m_dry)} o={_shape(o_dry)} w={_shape(w_dry)}")
print(f"‚úî H2O:  t={_shape(t_h2o)} m={_shape(m_h2o)} o={_shape(o_h2o)} w={_shape(w_h2o)}")
print(f"‚úî H2S:  t={_shape(t_h2s)} m={_shape(m_h2s)} o={_shape(o_h2s)} w={_shape(w_h2s)}")

# ---------------- List saved NPZ files ----------------
saved = sorted(OUT_DIR_PATH.glob("state_*.npz"))
if saved:
    print("üìÑ Saved files:")
    for p in saved:
        try:
            print(f"   - {p.name}  ({p.stat().st_size/1024:.1f} KB)")
        except Exception:
            print(f"   - {p.name}")
else:
    print("‚ö† No NPZ files found (unexpected).")

# ---------------- Plots: 0‚Äì50 fs and full window (safe variable names) ----------------
def _clip(t, *ys, limit=50.0):
    k = np.searchsorted(t, limit) + 1
    return (t[:k],) + tuple(y[:k] for y in ys)

# 0‚Äì50 fs (distinct names to avoid shadowing)
t50, m50_d, o50_d, w50_d = _clip(t_dry, m_dry, o_dry, w_dry, limit=50.0)
_,   m50_h, o50_h, w50_h = _clip(t_h2o, m_h2o, o_h2o, w_h2o, limit=50.0)
_,   m50_s, o50_s, w50_s = _clip(t_h2s, m_h2s, o_h2s, w_h2s, limit=50.0)

plt.figure(figsize=(9,5))
plt.plot(t50, m50_d, lw=2.2, label='DRY: metals')
plt.plot(t50, o50_d, lw=1.8, label='DRY: Œº-oxo')
plt.plot(t50, m50_h, lw=2.2, label='H‚ÇÇO: metals')
plt.plot(t50, o50_h, lw=1.8, label='H‚ÇÇO: Œº-oxo')
plt.plot(t50, w50_h, lw=1.8, label='H‚ÇÇO: waters')
plt.plot(t50, m50_s, lw=2.2, label='H‚ÇÇS: metals')
plt.plot(t50, o50_s, lw=1.8, label='H‚ÇÇS: Œº-oxo')
plt.plot(t50, w50_s, lw=1.8, label='H‚ÇÇS: waters')
plt.xlabel("Time (fs)"); plt.ylabel("Population ‚àë|œà|¬≤"); plt.title("GQR‚ÄìTDSE: Populations (first 50 fs)")
plt.grid(alpha=0.3); plt.legend(ncol=3, fontsize=9); plt.tight_layout()
short_png = OUT_DIR_PATH / "pop_0_50fs_compare.png"
plt.savefig(short_png, dpi=140); plt.show()
print(f"üìà saved: {short_png.name}")

# ---------- Full window (use the FULL arrays; never use names 'os' or 'ws') ----------
# Sanity checks to catch mismatches early
for label, t, y in [
    ("DRY metals", t_dry, m_dry),
    ("DRY mu-oxo", t_dry, o_dry),
    ("H2O metals", t_h2o, m_h2o),
    ("H2O mu-oxo", t_h2o, o_h2o),
    ("H2O waters", t_h2o, w_h2o),
    ("H2S metals", t_h2s, m_h2s),
    ("H2S mu-oxo", t_h2s, o_h2s),
    ("H2S waters", t_h2s, w_h2s),
]:
    assert hasattr(y, "shape") and t.shape == y.shape, f"Shape mismatch for {label}: t={t.shape}, y={getattr(y,'shape',None)}"

plt.figure(figsize=(9,5))
plt.plot(t_dry, m_dry, lw=2.2, label='DRY: metals')
plt.plot(t_dry, o_dry, lw=1.8, label='DRY: Œº-oxo')
plt.plot(t_h2o, m_h2o, lw=2.2, label='H‚ÇÇO: metals')
plt.plot(t_h2o, o_h2o, lw=1.8, label='H‚ÇÇO: Œº-oxo')
plt.plot(t_h2o, w_h2o, lw=1.8, label='H‚ÇÇO: waters')
plt.plot(t_h2s, m_h2s, lw=2.2, label='H‚ÇÇS: metals')
plt.plot(t_h2s, o_h2s, lw=1.8, label='H‚ÇÇS: Œº-oxo')   # ‚Üê correct full array
plt.plot(t_h2s, w_h2s, lw=1.8, label='H‚ÇÇS: waters')  # ‚Üê correct full array
plt.xlabel("Time (fs)"); plt.ylabel("Population ‚àë|œà|¬≤"); plt.title("GQR‚ÄìTDSE: Populations (0‚Äì600 fs)")
plt.grid(alpha=0.3); plt.legend(ncol=3, fontsize=9); plt.tight_layout()
full_png = OUT_DIR_PATH / "pop_full_compare.png"
plt.savefig(full_png, dpi=140); plt.show()
print(f"üìà saved: {full_png.name}")

# --- Final resonance envelope diagnostic with transparency & legend clarity ---
plt.figure(figsize=(8,4))
plt.plot(t_h2o, np.cumsum(w_h2o)/np.arange(1,len(w_h2o)+1), color='limegreen', lw=2.4, alpha=0.9, label='H‚ÇÇO avg')
plt.plot(t_h2s, np.cumsum(w_h2s)/np.arange(1,len(w_h2s)+1), color='gray', lw=2.0, alpha=0.8, label='H‚ÇÇS avg')
plt.plot(t_dry, np.cumsum(w_dry)/np.arange(1,len(w_dry)+1), color='orange', lw=2.0, alpha=0.9, label='DRY avg')
plt.xlabel("Time (fs)"); plt.ylabel("‚ü®|œà|¬≤‚ü©");
plt.title("Positive Control: H‚ÇÇO Resonance Envelope vs. DRY / H‚ÇÇS")
plt.grid(alpha=0.3); plt.legend(); plt.tight_layout()
plt.show()