## Define receptor binding pocket contact sites

In [1]:
import numpy as np
import pandas as pd
from Bio import PDB

In [4]:
# Parse PDB
parser = PDB.PDBParser(QUIET=True)
structure = parser.get_structure("HA", "structures/pdbs/2yp4.pdb1")

standard_atoms = []
sia_atoms = []

# Loop through structure to separate standard and SIA residues
for model in structure:
    for chain in model:
        for residue in chain:
            residue_name = residue.resname
            residue_num = residue.id[1]
            residue_type = "Non-standard" if residue_name not in PDB.Polypeptide.standard_aa_names else "Standard"
            
            for atom in residue:
                atom_info = {
                    "chain": chain.id,
                    "residue": residue_name,
                    "residue_number": residue_num,
                    "atom": atom.name,
                    "coord": atom.coord
                }
                
                if residue_name == "SIA":
                    sia_atoms.append(atom_info)
                elif residue_type == "Standard":
                    standard_atoms.append(atom_info)

# Calculate distances
distance_list = []

for s_atom in standard_atoms:
    for sia_atom in sia_atoms:
        dist = np.linalg.norm(s_atom["coord"] - sia_atom["coord"])
        distance_list.append([
            s_atom["chain"],
            s_atom["residue"],
            s_atom["residue_number"],
            s_atom["atom"],
            sia_atom["residue"],
            sia_atom["residue_number"],
            sia_atom["atom"],
            dist
        ])

# Create DataFrame
df_distances = pd.DataFrame(distance_list, columns=[
    "chain", "residue", "residue_number", "atom",
    "sia_residue", "sia_residue_number", "sia_atom", "distance"
])

df_distances.head()

Unnamed: 0,chain,residue,residue_number,atom,sia_residue,sia_residue_number,sia_atom,distance
0,A,ASN,8,N,SIA,5,C1,109.690216
1,A,ASN,8,N,SIA,5,C2,111.161263
2,A,ASN,8,N,SIA,5,C3,112.014168
3,A,ASN,8,N,SIA,5,C4,111.719551
4,A,ASN,8,N,SIA,5,C5,111.961769


In [6]:
df_distances.query('distance < 4')['residue_number'].unique()

array([ 98, 134, 135, 136, 137, 145, 153, 194, 226, 228])