In [2]:
#this is a second implementation that will just brute force all interactions between every chain.
#it will be necessary to prune interactions between antibodies and nanobodies.

from Bio.PDB import *
from Bio.PDB.PDBExceptions import PDBConstructionWarning
import warnings
import csv
import os

In [3]:
#Computes Binding Region Residues

def getBindingRegionResidues(chain1, chain2):
    # Convert the chain objects to lists of atoms
    atoms1 = list(chain1.get_atoms())
    atoms2 = list(chain2.get_atoms())
    
    # Create a NeighborSearch object for chain1
    ns1 = NeighborSearch(atoms1)
    
    # Find the residues in chain2 that are within 4 angstroms of chain1
    nearby_residues2 = []
    residue = None
    neighbors = None
    for atom in atoms2:
        neighbors = ns1.search(atom.coord, 4, level = 'A')
        for neighbor in neighbors:
            residue = neighbor.get_parent()
            if residue not in nearby_residues2:
                nearby_residues2.append(residue)
    
    # Create a NeighborSearch object for chain2
    ns2 = NeighborSearch(atoms2)

    # Find the residues in chain1 that are within 4 angstroms of chain2
    nearby_residues1 = []
    residue = None
    neighbors = None
    for atom in atoms1:
        neighbors = ns2.search(atom.coord, 4, level = 'A')
        for neighbor in neighbors:
            residue = neighbor.get_parent()
            if residue not in nearby_residues1:
                nearby_residues1.append(residue)

    #nearby_residues are on opposite chains
    output = {chain1.get_id():nearby_residues2, 
              chain2.get_id():nearby_residues1}
    
    return output

In [6]:
def getEpitopeParatopeInteractions(pdbFile, outputFile = None, isNb = False):
    warnings.simplefilter('ignore', PDBConstructionWarning)
    parser = PDBParser()
    structure = parser.get_structure(id = pdbFile, file = pdbFile)

    print(structure.header['compound'])

    # Calculate all chain interactions in the file also store chains for later
    pairs = []
    allChains = []
    for model in structure:
        for chain in model:
            allChains.append(chain.get_id())
            for pair in model:
                if chain.get_id() == pair.get_id():
                    continue
                else:
                    pairs.append([
                        chain, pair
                    ])

    ResidueInContact = []
    for pair in pairs:
        ResidueInContact.append(getBindingRegionResidues(pair[0], pair[1]))
    
    # Remove empty entries
    ResidueInContact = [item for item in ResidueInContact if len(item[list(item.keys())[0]]) != 0]

    # Remove intermolecular chain interactions
    compounds = structure.header['compound']
    chains = {k: v['chain'].replace(',', '').upper().split() for k, v in compounds.items()}
    InterChain = []
    for k, v in chains.items():
        for chain in v:
            for copy in v:
                if copy == chain:
                    continue
                else:
                    InterChain.append([
                        chain, copy
                    ])
    ResidueInContact = [item for item in ResidueInContact if list(item.keys()) not in InterChain]

    # Create csv file

    # Consolidate Binding residue sequence IDs with their chain.
    BindingDict = {}
    for pair in ResidueInContact:
        for k, v in pair.items():
            if k not in list(BindingDict.keys()):
                BindingDict[k] = []
            for residue in v:
                BindingDict[k].append(residue.get_id()[1])

    # Check to see if all chains are in BindingDict to avoid KeyError later on. Create an empty entry as that chain has no binding region.
    for chain in allChains:
        if chain not in list(BindingDict.keys()):
            BindingDict[chain] = []

    ResolvedSeq = {}
    for model in structure:
        for chain in model:
            ResolvedSeq[chain.get_id()] = []
            for residue in chain:
                ResolvedSeq[chain.get_id()].append(residue)

    data = []
    for chain, residues in ResolvedSeq.items():
        for residue in residues:
            label = 1 if residue.get_id()[1] in BindingDict[chain] else 0
            data.append([
                chain, residue.get_id()[1], residue.get_resname(), label
            ])
    
    # Remove water
    data = [row for row in data if row[2] != 'HOH']

    # Write file
    if outputFile != None:
        fields = ['chain', 'AA#', 'AA', 'Binding']
        if isNb:
            path = os.path.join('Data/Antibody', outputFile)
        else:
            path = os.path.join('Data/Nanobody', outputFile)
        with open(path, 'w', newline = '') as csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(fields)
            csvwriter.writerows(data)

    return data
    

In [8]:
from SAbDab_downloader import *
getEpitopeParatopeInteractions(pdbFile = 'pdbFiles/8dt8.pdb')



{'1': {'misc': '', 'molecule': 'spike glycoprotein', 'chain': 'a, c, b', 'synonym': 's glycoprotein,e2,peplomer protein', 'engineered': 'yes', 'mutation': 'yes'}, '2': {'misc': '', 'molecule': 'lm18 nanobody', 'chain': 'h', 'engineered': 'yes'}, '3': {'misc': '', 'molecule': 'nb136 nanobody', 'chain': 'd', 'engineered': 'yes'}}


[['A', 27, 'ALA', 0],
 ['A', 28, 'TYR', 0],
 ['A', 29, 'THR', 0],
 ['A', 30, 'ASN', 0],
 ['A', 31, 'SER', 0],
 ['A', 32, 'PHE', 0],
 ['A', 33, 'THR', 0],
 ['A', 34, 'ARG', 0],
 ['A', 35, 'GLY', 0],
 ['A', 36, 'VAL', 0],
 ['A', 37, 'TYR', 0],
 ['A', 38, 'TYR', 0],
 ['A', 39, 'PRO', 0],
 ['A', 40, 'ASP', 0],
 ['A', 41, 'LYS', 0],
 ['A', 42, 'VAL', 0],
 ['A', 43, 'PHE', 0],
 ['A', 44, 'ARG', 0],
 ['A', 45, 'SER', 0],
 ['A', 46, 'SER', 0],
 ['A', 47, 'VAL', 0],
 ['A', 48, 'LEU', 0],
 ['A', 49, 'HIS', 0],
 ['A', 50, 'SER', 0],
 ['A', 51, 'THR', 0],
 ['A', 52, 'GLN', 0],
 ['A', 53, 'ASP', 0],
 ['A', 54, 'LEU', 0],
 ['A', 55, 'PHE', 0],
 ['A', 56, 'LEU', 0],
 ['A', 57, 'PRO', 0],
 ['A', 58, 'PHE', 0],
 ['A', 59, 'PHE', 0],
 ['A', 60, 'SER', 0],
 ['A', 61, 'ASN', 0],
 ['A', 62, 'VAL', 0],
 ['A', 63, 'THR', 0],
 ['A', 64, 'TRP', 0],
 ['A', 65, 'PHE', 0],
 ['A', 66, 'HIS', 0],
 ['A', 79, 'PHE', 0],
 ['A', 80, 'ASP', 0],
 ['A', 81, 'ASN', 0],
 ['A', 82, 'PRO', 0],
 ['A', 83, 'VAL', 0],
 ['A', 84,

In [16]:
getpdb(pdb_entry='8dt8', out_path = 'pdbFiles')

True