<a href="https://colab.research.google.com/github/jamessutton600613-png/GC/blob/main/Untitled257.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q gemmi pyscf

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa
import gemmi
from pyscf import gto, dft
from pyscf.dft import numint
import hashlib, os, json

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m73.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.3/51.3 MB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# ============================
# HOMO BLOCK 2 — FUNCTION DEFINITIONS
# ============================

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa
from pyscf import gto, dft
from pyscf.dft import numint
import gemmi
import hashlib

def homo_cloud(mol, mf,
               box=4.0, ngrid=40, keep_percent=97.0,
               max_points=5000):
    xs = np.linspace(-box, box, ngrid)
    ys = xs
    zs = xs
    X, Y, Z = np.meshgrid(xs, ys, zs, indexing="ij")
    coords = np.stack([X.ravel(), Y.ravel(), Z.ravel()], axis=1)

    ni = numint.NumInt()
    ao = ni.eval_ao(mol, coords)

    mo_coeff = mf.mo_coeff
    mo_occ   = mf.mo_occ
    occ_idx  = np.where(mo_occ > 1e-6)[0]
    homo_idx = occ_idx[-1]

    psi  = (ao @ mo_coeff[:, homo_idx]).real
    prob = psi**2

    thr  = np.percentile(prob, keep_percent)
    mask = prob > thr

    coords = coords[mask]
    psi    = psi[mask]

    if coords.shape[0] > max_points:
        idx = np.random.choice(coords.shape[0], max_points, replace=False)
        coords = coords[idx]
        psi    = psi[idx]

    return coords, psi


def curvature_scf(symbols, coords, box=4.0, ngrid=40):
    xs = np.linspace(-box, box, ngrid)
    ys = xs
    zs = xs

    atom_str = "\n".join(f"{s} {x} {y} {z}"
                         for s,(x,y,z) in zip(symbols, coords))

    mol = gto.Mole()
    mol.atom  = atom_str
    mol.basis = "sto-3g"
    mol.spin  = 0
    mol.charge = 0
    mol.build()

    mf = dft.RKS(mol)
    mf.xc = "PBE"
    mf.max_cycle = 150
    mf.conv_tol  = 1e-5
    mf.kernel()

    return mf, mol


def modeB_fe4s4(coords, symbols, amp=0.20):
    coords = coords.copy()
    fe_idx = [i for i,s in enumerate(symbols) if s=="Fe"]
    s_idx  = [i for i,s in enumerate(symbols) if s=="S"]

    pairs = []
    for fi in fe_idx:
        r_fe = coords[fi]
        dists = [np.linalg.norm(coords[si]-r_fe) for si in s_idx]
        si = s_idx[int(np.argmin(dists))]
        pairs.append((fi, si))

    def stretch(X, i_fe, i_s, delta):
        v = X[i_s] - X[i_fe]
        d = np.linalg.norm(v)
        if d < 1e-6:
            return X
        u = v/d
        X[i_fe] -= 0.5*delta*u
        X[i_s]  += 0.5*delta*u
        return X

    if len(pairs) >= 2:
        coords = stretch(coords, pairs[0][0], pairs[0][1], +amp)
        coords = stretch(coords, pairs[1][0], pairs[1][1], -amp)

    return coords


def homo_from_cif(cif_path, tag):

    print(f"\n=== HOMO PROCESSING {tag} from {cif_path} ===")

    # --- load Fe/S from CIF ---
    doc = gemmi.cif.read_file(cif_path)
    block = doc.sole_block()
    st    = gemmi.make_structure_from_block(block)
    model = st[0]

    elems_list  = []
    coords_list = []
    for chain in model:
        for res in chain:
            for atom in res:
                el = atom.element.name
                if el in ("Fe","S"):
                    elems_list.append(el)
                    coords_list.append([atom.pos.x, atom.pos.y, atom.pos.z])

    if not elems_list:
        raise RuntimeError("No Fe/S atoms found in CIF.")

    elems  = np.array(elems_list)
    coords = np.array(coords_list)
    center = coords.mean(axis=0)
    coords_centered = coords - center

    # --- base ---
    mf_base, mol_base = curvature_scf(elems, coords_centered)
    coords_cloud_base, psi_base = homo_cloud(mol_base, mf_base)

    # --- mode B ---
    coords_st = modeB_fe4s4(coords_centered, elems, amp=0.20)
    mf_st, mol_st = curvature_scf(elems, coords_st)
    coords_cloud_st, psi_st = homo_cloud(mol_st, mf_st)

    # --- plot ---
    fig = plt.figure(figsize=(12,4))

    ax1 = fig.add_subplot(1,2,1, projection="3d")
    ax1.scatter(coords_cloud_base[:,0],
                coords_cloud_base[:,1],
                coords_cloud_base[:,2],
                s=2, c=np.where(psi_base>=0,"blue","red"), alpha=0.5)
    ax1.set_title(f"HOMO base — {tag}")
    ax1.set_xticks([]); ax1.set_yticks([]); ax1.set_zticks([])

    ax2 = fig.add_subplot(1,2,2, projection="3d")
    ax2.scatter(coords_cloud_st[:,0],
                coords_cloud_st[:,1],
                coords_cloud_st[:,2],
                s=2, c=np.where(psi_st>=0,"cyan","magenta"), alpha=0.5)
    ax2.set_title(f"HOMO stretched — {tag}")
    ax2.set_xticks([]); ax2.set_yticks([]); ax2.set_zticks([])

    plt.tight_layout()
    png = f"HOMO_{tag}.png"
    plt.savefig(png, dpi=300)
    plt.close()
    print("Saved PNG:", png)

    # SHA-512
    with open(png, "rb") as f:
        h = hashlib.sha512(f.read()).hexdigest()
    print("SHA512:", h)

    npz = f"HOMO_{tag}.npz"
    np.savez(npz,
             elems=elems,
             coords_centered=coords_centered,
             coords_cloud_base=coords_cloud_base,
             psi_base=psi_base,
             coords_cloud_st=coords_cloud_st,
             psi_st=psi_st)
    print("Saved NPZ:", npz)

In [None]:
def curvature_pdf_and_mf(symbols, coords,
                         box=4.0, ngrid=40, rho_thresh=1e-3):

    xs = np.linspace(-box, box, ngrid)
    ys = np.linspace(-box, box, ngrid)
    zs = np.linspace(-box, box, ngrid)
    X, Y, Z = np.meshgrid(xs, ys, zs, indexing="ij")
    grid = np.stack([X.ravel(), Y.ravel(), Z.ravel()], axis=1)

    atom_str = "\n".join(f"{s} {x} {y} {z}"
                         for s,(x,y,z) in zip(symbols, coords))

    mol = gto.Mole()
    mol.atom = atom_str
    mol.basis = "sto-3g"
    mol.build()

    mf = dft.RKS(mol)
    mf.xc = "PBE"
    mf.conv_tol = 1e-5
    mf.max_cycle = 150
    mf.kernel()

    return mf, mol

In [None]:
def modeB_fe4s4(coords, symbols, amp=0.20):
    coords = coords.copy()
    fe_idx = [i for i,s in enumerate(symbols) if s=="Fe"]
    s_idx  = [i for i,s in enumerate(symbols) if s=="S"]

    pairs = []
    for fi in fe_idx:
        r_fe = coords[fi]
        si = min(s_idx, key=lambda j: np.linalg.norm(coords[j]-r_fe))
        pairs.append((fi, si))

    def stretch(c, fe, s, delta):
        v = c[s]-c[fe]
        d = np.linalg.norm(v)
        if d==0: return c
        u = v/d
        c[fe] -= 0.5*delta*u
        c[s]  += 0.5*delta*u
        return c

    if len(pairs)>=2:
        coords = stretch(coords, pairs[0][0], pairs[0][1], +amp)
        coords = stretch(coords, pairs[1][0], pairs[1][1], -amp)

    return coords

In [None]:
def homo_from_cif(cif_path, tag):

    print(f"\n=== HOMO PROCESSING {tag} ===")

    # Load Fe/S
    doc = gemmi.cif.read_file(cif_path)
    block = doc.sole_block()
    st = gemmi.make_structure_from_block(block)
    model = st[0]

    fes=[]
    for chain in model:
        for res in chain:
            for atom in res:
                if atom.element.name in ("Fe","S"):
                    fes.append([atom.element.name, atom.pos.x, atom.pos.y, atom.pos.z])
    arr = np.array([r[1:] for r in fes])
    elems = np.array([r[0] for r in fes])

    # extract Fe4S4 cluster
    fe_idx=np.where(elems=="Fe")[0]
    s_idx=np.where(elems=="S")[0]

    # naive connectivity: the 4 closest Fe atoms
    # (same result as cluster_fes but quicker)
    if len(fe_idx)<4:
        raise RuntimeError("No 4 Fe in CIF.")

    # centre
    center = arr.mean(axis=0)
    coords_base = arr - center

    # SCF
    mf_base, mol_base = curvature_pdf_and_mf(elems, coords_base)

    # HOMO base
    coords_cloud_base, psi_base = homo_cloud(mol_base, mf_base)

    # modeB coords
    coords_st = modeB_fe4s4(coords_base, elems, amp=0.20)

    # SCF again
    mf_st, mol_st = curvature_pdf_and_mf(elems, coords_st)

    coords_cloud_st, psi_st = homo_cloud(mol_st, mf_st)

    # -----------------------------------
    # PNG OUTPUT
    # -----------------------------------
    fig = plt.figure(figsize=(16,4))

    # Base
    ax1 = fig.add_subplot(1,2,1, projection='3d')
    ax1.scatter(coords_cloud_base[:,0],
                coords_cloud_base[:,1],
                coords_cloud_base[:,2],
                s=2, c=np.where(psi_base>=0,"blue","red"), alpha=0.5)
    ax1.set_title(f"HOMO base — {tag}")

    # ModeB
    ax2 = fig.add_subplot(1,2,2, projection='3d')
    ax2.scatter(coords_cloud_st[:,0],
                coords_cloud_st[:,1],
                coords_cloud_st[:,2],
                s=2, c=np.where(psi_st>=0,"cyan","magenta"), alpha=0.5)
    ax2.set_title(f"HOMO stretched — {tag}")

    png = f"HOMO_{tag}.png"
    plt.savefig(png, dpi=300)
    plt.close()
    print("Saved PNG:", png)

    # SHA512
    h = hashlib.sha512(open(png,"rb").read()).hexdigest()
    print("SHA512:", h)

    # NPZ bundle
    np.savez(f"HOMO_{tag}.npz",
             coords_base=coords_cloud_base,
             psi_base=psi_base,
             coords_mode=coords_cloud_st,
             psi_mode=psi_st,
             elems=elems)
    print("Saved NPZ:", f"HOMO_{tag}.npz")

In [None]:
targets = [
    ("4S38", "4S38.cif"),
    ("4S39", "4S39.cif"),
    ("4S3A", "4S3A.cif"),
    ("4S3B", "4S3B.cif"),
    ("4S3C", "4S3C.cif"),
    ("4S3D", "4S3D.cif"),   # add this
    ("4S3E", "4S3E.cif"),
    ("4S3F", "4S3F.cif"),   # add this
]
for tag, cif in targets:
    try:
        homo_from_cif(cif, tag)
    except Exception as e:
        print("ERROR:", tag, e)


=== HOMO PROCESSING 4S38 ===
