In [2]:
from pathlib import Path
from Bio.PDB import MMCIFParser, Superimposer, is_aa, PDBParser

# Paths to your files
ref_path  = Path("./data/ground_truth/6xr6.cif")                # ground‑truth
# ref_path  = Path("./data/ground_truth/6xrg.cif")                # ground‑truth
pred_path = Path("./data/predicted/ESM_original/esm3_abl1b_complete_229-515.pdb")  # ESM‑3 prediction
# pred_path = Path("./data/predicted/esm3_abl1b_complete_partialdiffusion_0_229-515.pdb")  # ESM‑3 prediction
# pred_path = Path("./data/predicted/esm3_abl1b_complete_partialdiffusion_t10_0_229-515.pdb")  # ESM‑3 prediction

def ca_dict(structure, chain_id="A"):
    """
    Return {resseq: CA_atom} for standard amino‑acid residues in *chain_id*.
    Works for PDB or mmCIF parsed with Bio.PDB.
    """
    chain = structure[0][chain_id]          # first model assumed
    return {res.get_id()[1]: res["CA"]
            for res in chain
            if is_aa(res, standard=True) and "CA" in res}

parser = MMCIFParser(QUIET=True)
PDBparser = PDBParser(QUIET=True)
ref_struct  = parser.get_structure("ref" , ref_path)
pred_struct = PDBparser.get_structure("pred", pred_path)

# ---- choose the chains to compare ----
chain_ref  = "A"
chain_pred = "A"            # change if the prediction used a different ID

ref_ca  = ca_dict(ref_struct , chain_ref )
pred_ca = ca_dict(pred_struct, chain_pred)

common_ids = sorted(ref_ca.keys() & pred_ca.keys())
print(f"Common residues: {len(common_ids)}")

if len(common_ids) < 3:
    raise ValueError("Too few overlapping residues to superimpose.")

ref_atoms  = [ref_ca[i]  for i in common_ids]
pred_atoms = [pred_ca[i] for i in common_ids]

sup = Superimposer()
sup.set_atoms(ref_atoms, pred_atoms)

print(f"Backbone Cα RMSD over {len(common_ids)} residues = {sup.rms:.3f} Å")

# Optional: apply the transformation to the prediction object
# sup.apply(pred_struct.get_atoms())

# Optional: save the transformed prediction to inspect in PyMOL / ChimeraX
# from Bio.PDB import PDBIO
# io = PDBIO()
# io.set_structure(pred_struct)
# io.save("esm3_aligned_to_6xr6.pdb")



Common residues: 268
Backbone Cα RMSD over 268 residues = 2.655 Å
