In [5]:
import re
from openmm.app import PDBFile

def find_cross_chain_bonds(pdf_file):
    """Find bonds between atoms in different chains of a PDB structure.
    
    Parameters
    ----------
    pdf_file : str
        Path to the PDB file to analyze
        
    Returns
    -------
    list
        List of bonds that connect atoms in different chains
    """
    pdf = PDBFile(pdf_file)
    cross_chain_bonds = []
    
    for bond in pdf.topology.bonds():
        str_atom1 = str(bond.atom1)
        str_atom2 = str(bond.atom2)
        # get the chain number using regex <Atom 6 (C) of chain 0 residue 0 (THR)> <Atom 4 (CA) of chain 0 residue 0 (THR)>
        chain_atom1 = re.search(r'chain (\d+)', str_atom1).group(1)
        chain_atom2 = re.search(r'chain (\d+)', str_atom2).group(1)
        if chain_atom1 != chain_atom2:
            cross_chain_bonds.append(bond)

    return cross_chain_bonds

cross_chain_bonds = find_cross_chain_bonds('CD28_general.pdb')
print(f'Found {len(cross_chain_bonds)} cross-chain bonds in CD28_general.pdb')
print(cross_chain_bonds)

cross_chain_bonds = find_cross_chain_bonds('CD28_general_fixed.pdb')
print(f'Found {len(cross_chain_bonds)} cross-chain bonds in CD28_general_fixed.pdb')
print(cross_chain_bonds[0])

cross_chain_bonds = find_cross_chain_bonds('CD28_general_solvated.pdb')
print(f'Found {len(cross_chain_bonds)} cross-chain bonds in CD28_general_solvated.pdb')
print(cross_chain_bonds[0])



Found 0 cross-chain bonds in CD28_general.pdb
[]
Found 0 cross-chain bonds in CD28_general_fixed.pdb


IndexError: list index out of range

In [38]:
original_pdbs = [
    '../../data/241010_FoldingUponBinding/input/A-synuclein/A-synuclein_alpha.pdb',
    '../../data/241010_FoldingUponBinding/input/A-synuclein/A-synuclein_general.pdb',
    '../../data/241010_FoldingUponBinding/input/CD28/CD28_alpha.pdb',
    '../../data/241010_FoldingUponBinding/input/CD28/CD28_beta.pdb',
    '../../data/241010_FoldingUponBinding/input/CD28/CD28_general.pdb',
    '../../data/241010_FoldingUponBinding/input/CD28/CD28_partial.pdb',
    '../../data/241010_FoldingUponBinding/input/p53/p53_1.pdb',
    '../../data/241010_FoldingUponBinding/input/p53/p53_2.pdb',
    '../../data/241010_FoldingUponBinding/input/p53/p53_end.pdb',
    '../../data/241010_FoldingUponBinding/input/SUMO/sumo1.pdb',
    '../../data/241010_FoldingUponBinding/input/SUMO/sumo1c.pdb',
]

for pdb_file in original_pdbs:
    cross_chain_bonds = find_cross_chain_bonds(pdb_file)
    print(f'Found {len(cross_chain_bonds)} cross-chain bonds in {pdb_file}')

Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/input/A-synuclein/A-synuclein_alpha.pdb
Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/input/A-synuclein/A-synuclein_general.pdb
Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/input/CD28/CD28_alpha.pdb
Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/input/CD28/CD28_beta.pdb
Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/input/CD28/CD28_general.pdb
Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/input/CD28/CD28_partial.pdb
Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/input/p53/p53_1.pdb
Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/input/p53/p53_2.pdb
Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/input/p53/p53_end.pdb
Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/input/SUMO/sumo1.pdb
Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/in

In [42]:
fixed_pdbs = [
    '../../data/241010_FoldingUponBinding/output/241029/A-synuclein/alpha_1/A-synuclein_alpha_fixed.pdb',
    '../../data/241010_FoldingUponBinding/output/241029/A-synuclein/general_1/A-synuclein_general_fixed.pdb',
    '../../data/241010_FoldingUponBinding/output/241029/CD28/alpha_1/CD28_alpha_fixed.pdb',
    '../../data/241010_FoldingUponBinding/output/241029/CD28/beta_1/CD28_beta_fixed.pdb',
    '../../data/241010_FoldingUponBinding/output/241029/CD28/general_1/CD28_general_fixed.pdb',
    '../../data/241010_FoldingUponBinding/output/241029/CD28/partial_1/CD28_partial_fixed.pdb',
    '../../data/241010_FoldingUponBinding/output/241029/p53/1_1/p53_1_fixed.pdb',
    '../../data/241010_FoldingUponBinding/output/241029/p53/2_1/p53_2_fixed.pdb',
    '../../data/241010_FoldingUponBinding/output/241029/p53/end_1/p53_end_fixed.pdb',
    '../../data/241010_FoldingUponBinding/output/241029/sumo/1_1/sumo_1_fixed.pdb',
    '../../data/241010_FoldingUponBinding/output/241029/sumo/1c_1/sumo_1c_fixed.pdb',
]
for pdb_file in fixed_pdbs:
    cross_chain_bonds = find_cross_chain_bonds(pdb_file)
    print(f'Found {len(cross_chain_bonds)} cross-chain bonds in {pdb_file}')


Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/output/241029/A-synuclein/alpha_1/A-synuclein_alpha_fixed.pdb
Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/output/241029/A-synuclein/general_1/A-synuclein_general_fixed.pdb
Found 1 cross-chain bonds in ../../data/241010_FoldingUponBinding/output/241029/CD28/alpha_1/CD28_alpha_fixed.pdb
Found 1 cross-chain bonds in ../../data/241010_FoldingUponBinding/output/241029/CD28/beta_1/CD28_beta_fixed.pdb
Found 1 cross-chain bonds in ../../data/241010_FoldingUponBinding/output/241029/CD28/general_1/CD28_general_fixed.pdb
Found 1 cross-chain bonds in ../../data/241010_FoldingUponBinding/output/241029/CD28/partial_1/CD28_partial_fixed.pdb
Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/output/241029/p53/1_1/p53_1_fixed.pdb
Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/output/241029/p53/2_1/p53_2_fixed.pdb
Found 0 cross-chain bonds in ../../data/241010_FoldingUponBinding/outp

In [3]:
from src.fixer import fixer

original_pdbs = [
    '../../data/241010_FoldingUponBinding/input/A-synuclein/A-synuclein_alpha.pdb',
    '../../data/241010_FoldingUponBinding/input/A-synuclein/A-synuclein_general.pdb',
    '../../data/241010_FoldingUponBinding/input/CD28/CD28_alpha.pdb',
    '../../data/241010_FoldingUponBinding/input/CD28/CD28_beta.pdb',
    '../../data/241010_FoldingUponBinding/input/CD28/CD28_general.pdb',
    '../../data/241010_FoldingUponBinding/input/CD28/CD28_partial.pdb',
    '../../data/241010_FoldingUponBinding/input/p53/p53_1.pdb',
    '../../data/241010_FoldingUponBinding/input/p53/p53_2.pdb',
    '../../data/241010_FoldingUponBinding/input/p53/p53_end.pdb',
    '../../data/241010_FoldingUponBinding/input/SUMO/sumo1.pdb',
    '../../data/241010_FoldingUponBinding/input/SUMO/sumo1c.pdb',
]

split_chains = [False, False, False, False, False, False, True, True, True, True, True]

for pdb_file, split_chain in zip(original_pdbs, split_chains):
    fixer(pdb_file, './', split_chains=split_chain)



In [7]:

fixed_pdbs = [
    'A-synuclein_alpha_fixed.pdb',
    'A-synuclein_general_fixed.pdb',
    'CD28_alpha_fixed.pdb',
    'CD28_beta_fixed.pdb',
    'CD28_general_fixed.pdb',
    'CD28_partial_fixed.pdb',
    'p53_1_fixed.pdb',
    'p53_2_fixed.pdb',
    'p53_end_fixed.pdb',
    'sumo1_fixed.pdb',
    'sumo1c_fixed.pdb',
]
for pdb_file in fixed_pdbs:
    cross_chain_bonds = find_cross_chain_bonds(pdb_file)
    print(f'Found {len(cross_chain_bonds)} cross-chain bonds in {pdb_file}')


Found 0 cross-chain bonds in A-synuclein_alpha_fixed.pdb
Found 0 cross-chain bonds in A-synuclein_general_fixed.pdb
Found 0 cross-chain bonds in CD28_alpha_fixed.pdb
Found 0 cross-chain bonds in CD28_beta_fixed.pdb
Found 0 cross-chain bonds in CD28_general_fixed.pdb
Found 0 cross-chain bonds in CD28_partial_fixed.pdb
Found 0 cross-chain bonds in p53_1_fixed.pdb
Found 0 cross-chain bonds in p53_2_fixed.pdb
Found 0 cross-chain bonds in p53_end_fixed.pdb
Found 0 cross-chain bonds in sumo1_fixed.pdb
Found 0 cross-chain bonds in sumo1c_fixed.pdb
