In [8]:
from Bio.SVDSuperimposer import SVDSuperimposer
from Bio.PDB import PDBParser
import numpy as np
from tqdm import tqdm
from pathlib import Path
import pandas as pd
atoms_list = {
    "backbone": ["N", "CA", "C", "O"],
}

def get_antigen_rmsd(pdb_code, benchmark_folder: Path):
    parser = PDBParser()
    native = benchmark_folder/f'{pdb_code}/{pdb_code}_antigen.pdb'
    model = benchmark_folder/f'{pdb_code}/{pdb_code}_AF2_antigen_model.pdb'

    native_struc_res = parser.get_structure('native', native).get_residues()
    model_struc_res = parser.get_structure('model', model).get_residues()

    native_chains = list(parser.get_structure('native', native).get_chains())
    model_chains = list(parser.get_structure('model', model).get_chains())
    if pdb_code == '7k7h':
        native_struc_res = list(native_chains[0].get_residues()) + list(native_chains[1].get_residues())

    atom_coords_native = []
    atom_coords_model = []

    atom_coords_model_epitope = []
    atom_coords_native_epitope = []

    epitope_def_nums = set()
    with (benchmark_folder/f'{pdb_code}/{pdb_code}_constraint_pairs.txt').open() as inf:
        for line in inf.readlines()[1:]:
            antigen_line = line.split(':')[1]
            ls = antigen_line.strip().split(',')
            epitope_def_nums.add(f'{ls[0]}{ls[1]}')

    for res_nat, res_mod in zip(native_struc_res, model_struc_res):
        atoms_nat = [list(atom.coord) for atom in list(res_nat.get_atoms()) if atom.get_id() in atoms_list['backbone']]
        atoms_mod = [list(atom.coord) for atom in list(res_mod.get_atoms()) if atom.get_id() in atoms_list['backbone']]

        atom_coords_native += atoms_nat
        atom_coords_model += atoms_mod

        if f'{res_nat.get_full_id()[2]}{res_nat.get_full_id()[3][1]}' in epitope_def_nums:
            atom_coords_model_epitope += atoms_mod
            atom_coords_native_epitope += atoms_nat

    svd = SVDSuperimposer()
    svd.set(np.array(atom_coords_native), np.array(atom_coords_model))
    svd.run()
    rmsd = svd.get_rms()

    svd = SVDSuperimposer()
    svd.set(np.array(atom_coords_native_epitope), np.array(atom_coords_model_epitope))
    svd.run()
    rmsd_epi = svd.get_rms()
    row = {'pdb': pdb_code, 'rmsd': rmsd, 'rmsd_epitope': rmsd_epi}

    return row

In [9]:
records = []
benchmark_folder = Path(
    '/Users/dcutting/Library/CloudStorage/Box-Box/Exscientia - Bonvin Lab share/benchmark_haddock_23_May_2023')
for path in tqdm(list(benchmark_folder.iterdir())):
    if path.is_dir():
        pdb_code = path.name
    else:
        continue
    try:
        records.append(get_antigen_rmsd(pdb_code, benchmark_folder))
    except Exception as e:
        print(f"Got error {e} for {pdb_code=}.")


100%|██████████| 83/83 [00:12<00:00,  6.82it/s]


In [10]:
epitope_rmsd_df = pd.DataFrame.from_records(records)


In [13]:
print(epitope_rmsd_df)

     pdb      rmsd  rmsd_epitope
0   7q0g  0.705099      1.143558
1   7q0i  2.453988      4.086787
2   7pi7  1.421933      2.482512
3   7f7e  1.037556      0.423620
4   7n4i  1.075521      0.971252
..   ...       ...           ...
76  7kf0  0.884482      0.360559
77  7kf1  0.879388      0.370756
78  7phu  1.232355      2.059458
79  6xsw  1.305173      1.450052
80  7s13  1.735253      0.948656

[81 rows x 3 columns]


In [12]:
epitope_rmsd_df.to_csv('../data/AF2_antigen_rmsd.csv')