In [1]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from Bio.PDB import PDBParser, MMCIFParser, Superimposer
from Bio.SVDSuperimposer import SVDSuperimposer
from Bio.SeqUtils import seq1
from Bio import pairwise2
import numpy as np



In [3]:
def superimpose_and_plot(cif_file, pdb_file):

    def get_sequence(structure):
        return "".join(seq1(residue.get_resname()) for model in structure for chain in model for residue in chain if residue.id[0] == ' ')
    
    # Extract sequence from each file
    native = PDBParser(QUIET=True).get_structure("native", pdb_file)
    model = MMCIFParser(QUIET=True).get_structure("model", cif_file)
    
    native_seq = get_sequence(native)
    model_seq = get_sequence(model)

    # Perform global alignment 
    best_alignment = pairwise2.align.globalxx(native_seq, model_seq)[0]

    aligned_native = best_alignment[0]
    aligned_model = best_alignment[1]

    # Trim sequences based on alignment
    # Extract CA atoms for superimposition
    native_atoms = []
    model_atoms = []

    native_residues = [residue for model in native for chain in model for residue in chain if residue.id[0] == ' ']
    model_residues = [residue for model in model for chain in model for residue in chain if residue.id[0] == ' ']

    native_index, model_index = 0, 0
    for res_native, res_model in zip(aligned_native, aligned_model):
        if res_native != '-' and res_model != '-':
            native_atoms.append(native_residues[native_index]['CA'])
            model_atoms.append(model_residues[model_index]['CA'])
        if res_native != '-':
            native_index += 1
        if res_model != '-':
            model_index += 1
        
    # Convert lists to numpy arrays for SVD superimposition
    native_atoms = np.array(native_atoms)
    model_atoms = np.array(model_atoms)

    # Superimpose two structures using SVD
    super_imposer = SVDSuperimposer()
    super_imposer.set(native_atoms, model_atoms)
    super_imposer.run()

    # Extract RMSD
    rmsd = super_imposer.get_rms()
    print(f"RMSD: {rmsd:.3f} Å")

    return

In [4]:
cif_file = "af3_predictions/fold_t1124_new/fold_t1124_new_model_0.cif"
pdb_file = "af3_predictions/fold_t1124_new/7ux8.pdb"
superimpose_and_plot(cif_file, pdb_file)

FileNotFoundError: [Errno 2] No such file or directory: 'af3_predictions/fold_t1124_new/7ux8.pdb'