# Identify and Annotate Residues Near Antibody Binding Sites

Aim: annotate antigen residues within 4 angstroms of binding sites, compare results with paper

## Dependencies

In [1]:
import pandas as pd
import numpy as np
import Bio.PDB

## Input information

In [2]:
pdbids = pd.read_csv("./additional_data/pdb_identifiers.csv")
pdbids

Unnamed: 0,Antibody,Short_Name,PDB_id,Primary_Chains,Secondary_Chains
0,Casirivimab,CoV_binder_1,6XDG,e,bd
1,Imdevimab,CoV_binder_2,7ZJL,abc,ghijkl
2,Bamlanivimab,CoV_binder_3,7KMG,cf,abde
3,Regdanvimab,CoV_binder_4,7CM4,a,hi
4,Tixagevimab,CoV_binder_6,7L7D,e,hl
5,Cilgavimab,CoV_binder_7,8SUO,a,im
6,Etesevimab,CoV_binder_8,7F7E,e,cl
7,COR-101,CoV_binder_9,7B3O,e,hl
8,CC12.1,CoV_binder_10,8CWV,a,hl
9,Fab-52,CoV_binder_11,7K9Z,e,hl


## Functions used for analysis

In [3]:
def load_pdb_file(pdb_id, pdb_file):
    """
    Load a PDB file to return the PDB structure.

    Args:
        pdb_id: character string identifying the PDB structure.
        pdb_file: character string giving the file path to the PDB file.

    Returns:
        PDB structure.
    """
    pdb_parser = Bio.PDB.PDBParser()
    input_struct = pdb_parser.get_structure(pdb_id, pdb_file)
    return input_struct


def display_chain_info(pdb_id, pdb_file):
    """
    Display the chain information within the PDB file for review.

    Args:
        pdb_id: character string identifying the PDB structure.
        pdb_file: character string giving the file path to the PDB file.
    
    Returns:
        Compound information from the PDB file header.
    """
    input_struct = load_pdb_file(pdb_id, pdb_file)
    return input_struct.header["compound"]


def calc_midpoint_coordinate(x, y):
    """
    Calculates the midpoint coordinate between two coordinates in their own space.

    Args:
        x: numerical list of coords.
        y: numerical list of coords.
    
    Returns:
        A list of numerics indicating the midpoint of the two coordinates.
    """
    assert len(x) == len(y)
    mid = []
    for i in range(len(x)):
        pos = (x[i] + y[i]) / 2
        mid.append(pos)
    return mid

aa3to1 = {
    'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K',
    'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N', 
    'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W', 
    'ALA': 'A', 'VAL': 'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M',
    'SEC': 'U', 'PLY': 'O', 'BMA': 'bma', 'EDO': 'edo', 'GOL': 'gol',
    'FUC': 'fuc', 'HOH': 'hoh', 'MAN': 'man', 'NAG': 'nag', 'NI': 'ni', 
    'CL': 'cl'
    }


def gather_molecular_coordinates(pdb_id, pdb_file, chains_of_interest):
    """
    Collect the atomic coordinates from a given structure (for specificed chains)
    into a dataframe, labeling each atom with its structure, chain, residue, and 
    atom associated information.

    Arguments:
        pdb_id: character string of the PDB structure.
        pdb_file: character string giving the file path to the PDB file.
        chains_of_interset: list of characters identifying which chains to focus on.

    Returns:
        Pandas dataframe with indicated information above in description.
    """
    # Load structure
    pdb_struct = load_pdb_file(pdb_id, pdb_file)
    # Check inputs
    assert isinstance(chains_of_interest, (list))
    # For loop to collect info from chains of interest
    res_df = pd.DataFrame()
    for model in pdb_struct:
        for chain in model:
            if chain.id.lower() not in [x.lower() for x in chains_of_interest]:
                continue
            for residue in chain:
                for atom in residue:
                    atom_dict = {
                        'pdb': pdb_id,
                        'chain': chain.id.lower(),
                        'residnum': residue.id[1],
                        'resid': aa3to1[residue.resname],
                        'atom': atom.id,
                        'coord': [atom.coord]
                    }
                    df = pd.DataFrame.from_dict(atom_dict)
                    res_df = pd.concat([res_df, df], ignore_index = True)
    # Return the dataframe with coordinate information
    return res_df


def identify_primary_interacting_atoms(pdb_id, pdb_file, primary_chains, secondary_chains, distance_threshold = 4.0, CA_dist_threshold = 25.0):
    """
    Identify atoms on primary chains within a distance threshold of atoms present on secondary chains. Additionally, 
    annotate with midpoints of interactions in coordinate space.

    Args:
        pdb_id: character string of the PDB structure.
        pdb_file: character string giving the file path to the PDB file.
        primary_chains: character string, no spaces, of chains in structure considered primary.
        secondary_chains: character string, no spaces, of chains in structure considered secondary.
        distance_threshold: float indicating the angstrom distance threshold to consider for interating.
        CA_dist_threshold: fload minimum distance between CA molecules to be considered for interation.
    
    Returns:
        Dataframe of primary chain atoms with annotations of interacting secondary atoms, residues, and mid-point
        atomic coordinates. Additionally outputs the atomic data gathered during the process for further summary.
    """
    # Format chain information
    primary_chains = list(set([x.lower() for x in primary_chains.strip()]))
    secondary_chains = list(set([x.lower() for x in secondary_chains.strip()]))
    chains_of_interest = list(set(primary_chains + secondary_chains))

    # Check no intersection between primary and secondary
    assert len(primary_chains) + len(secondary_chains) == len(chains_of_interest)
    
    # Gather all atomic data of interest from structure
    atomic_data = gather_molecular_coordinates(pdb_id, pdb_file, chains_of_interest)
    
    # Using for loops, move through all pairwise comparisons between primary and secondary chain atoms,
    # saving only the interations that are within the distance thresholds
    all_prime_data = atomic_data[atomic_data["chain"].isin(primary_chains)]
    all_secondary_data = atomic_data[atomic_data["chain"].isin(secondary_chains)]
    interact_df = pd.DataFrame()
    for prime_chain in primary_chains:
        prime_df = all_prime_data[all_prime_data["chain"] == prime_chain]
        for secondary_chain in secondary_chains:
            second_df = all_secondary_data[all_secondary_data["chain"] == secondary_chain]
            print(f"Checking {pdb_id} for interaction between chain {prime_chain} (length: {len(prime_df)}) and chain {secondary_chain} (length: {len(second_df)}).")
            
            # Check only CA atoms for distances
            prime_ca_df = prime_df[prime_df["atom"] == "CA"]
            second_ca_df = second_df[second_df["atom"] == "CA"]
            near_prime_df = pd.DataFrame()
            near_second_df = pd.DataFrame()

            for i, pri_ca in prime_ca_df.iterrows():
                for j, sec_ca in second_ca_df.iterrows():
                    if np.linalg.norm(pri_ca["coord"] - sec_ca["coord"]) > CA_dist_threshold:
                        continue
                    pndf = pd.DataFrame(pri_ca).T
                    pndf = pndf.drop(["atom", "coord"], axis = 1)
                    near_prime_df = pd.concat([near_prime_df, pndf], ignore_index = True)
                    sndf = pd.DataFrame(sec_ca).T
                    sndf = sndf.drop(["atom", "coord"], axis = 1)
                    near_second_df = pd.concat([near_second_df, sndf], ignore_index = True)
            
            # Filter all atoms for only those associated with residues near interacting chains
            near_prime_df = near_prime_df.drop_duplicates(ignore_index = True)
            near_second_df = near_second_df.drop_duplicates(ignore_index = True)
            # Check for no interations, less continue
            if 0 in (len(near_prime_df), len(near_second_df)):
                continue
            # Filter atoms for only those associated with suspected interacting residues
            prime_filtered_df = pd.merge(prime_df, near_prime_df, on = ["pdb", "chain", "residnum", "resid"], how = "inner")
            second_filtered_df = pd.merge(second_df, near_second_df, on = ["pdb", "chain", "residnum", "resid"], how = "inner")
            print(f"  Reduced comparisons by removing {len(prime_df) - len(prime_filtered_df)} atoms from the primary chain {prime_chain}.")
            print(f"  Reduced comparisons by removing {len(second_df) - len(second_filtered_df)} atoms from the secondary chain {secondary_chain}.")
            
            for i, pri in prime_filtered_df.iterrows():
                for j, sec in second_filtered_df.iterrows():
                    if np.linalg.norm(pri["coord"] - sec["coord"]) > distance_threshold:
                        continue
                    df = pd.DataFrame(pri).T
                    df["int_chain"] = sec["chain"]
                    df["int_residnum"] = sec["residnum"]
                    df["int_resid"] = sec["resid"]
                    df["int_atom"] = sec["atom"]
                    df["int_coord"] = [sec["coord"]]
                    df["mid_coord"] = [calc_midpoint_coordinate(pri["coord"], sec["coord"])]
                    df["int_dist"] = np.linalg.norm(pri["coord"] - sec["coord"])
                    interact_df = pd.concat([interact_df, df], ignore_index = True)
    
    # Return the dataframe with interacting atomic coordinates
    return interact_df, atomic_data


def summarise_interations_to_residues(int_df, atomic_data):
    """
    Summarise the interations identified at the atomic level to residues, coordinates are related to the CA atoms.

    Args:
        int_df: pandas dataframe as output from identify_primary_interacting_atoms function.
        atomic_data: atom data provided from the output of identify_primary_interacting_atoms function.

    Returns:
        A pandas dataframe with interacting residues with atoms within threshold distance to interacting residues.
    """
    # Identify unique residues interacting between primary and secondary chains
    res_df = int_df[["pdb", "chain", "residnum", "resid", "int_chain", "int_residnum", "int_resid", "int_dist"]]
    res_df = res_df.groupby(["pdb", "chain", "residnum", "resid", "int_chain", "int_residnum", "int_resid"]).agg({"int_dist": "min"})
    res_df = res_df.reset_index()

    # Join coordinates of CA atoms for primary and secondary residues
    ca_df = atomic_data.copy()
    ca_df = ca_df[ca_df["atom"] == "CA"]
    ca_df = ca_df.drop(["atom"], axis = 1)
    res_df = pd.merge(res_df, ca_df, on = ["pdb", "chain", "residnum", "resid"], how = "left")

    int_ca_df = ca_df
    int_ca_df = int_ca_df.rename(columns = {"chain": "int_chain", "residnum": "int_residnum", "resid": "int_resid", "coord": "int_coord"})
    res_df = pd.merge(res_df, int_ca_df, on = ["pdb", "int_chain", "int_residnum", "int_resid"], how = "left")

    # Compute distance and midpoints between CA residues
    res_df["CA_dist"] = res_df.apply(lambda row: np.linalg.norm(row["coord"] - row["int_coord"]), axis = 1)
    res_df["CA_midpoint"] = res_df.apply(lambda row: calc_midpoint_coordinate(row["coord"], row["int_coord"]), axis = 1)

    # Return
    return res_df


## Example cell for querying header information about PDB structures

In [4]:
display_chain_info("6XDG", "./pdb_files/6XDG.pdb")

{'1': {'misc': '',
  'molecule': 'spike protein s1',
  'chain': 'e',
  'fragment': 'receptor binding domain (unp residues 319-541)',
  'synonym': 's glycoprotein,e2,peplomer protein,spike glycoprotein',
  'engineered': 'yes'},
 '2': {'misc': '',
  'molecule': 'regn10933 antibody fab fragment light chain',
  'chain': 'd',
  'engineered': 'yes'},
 '3': {'misc': '',
  'molecule': 'regn10933 antibody fab fragment heavy chain',
  'chain': 'b',
  'engineered': 'yes'},
 '4': {'misc': '',
  'molecule': 'regn10987 antibody fab fragment heavy chain',
  'chain': 'c',
  'engineered': 'yes'},
 '5': {'misc': '',
  'molecule': 'regn10987 antibody fab fragment light chain',
  'chain': 'a',
  'engineered': 'yes'}}

# Query PDB Files for interacting residues

This operation will use the functions above to do the following:
1. Iterate over the information in the 'pdb_identifiers.csv' file, taking the PDB ID, primary, and secondary chain info.
2. The PDB structure is loaded and all atomic coordinates are put into a single dataframe.
3. The atomic data is then separated into primary (SARS-CoV RBD) and secondary (antibody or variable fragments) chains.
4. Each atom in the primary chain(s) (often there are multiple) is compared to the secondary chains to determine which atoms are within 4 angstroms between the chains. Each chain is first only compared by the position of the alpha carbons, if alpha carbons are within 25 angstroms between primary and secondary chains, then all atoms from that residue are used in the atomic comparison. This addition to the algorithm significantly reduced the amount of computational time to needed for this approach (run time was just under 1h).
5. Atomic comparison data is then summarised to provide interacting residues across the chains. 
6. This process is repeated for each of the 21 PDB files to get a comprehensive list of all binding atoms and residues between SARS-CoV RBD and different variable binding fragments.

In [5]:
all_interacting_atoms = pd.DataFrame()
all_interacting_residues = pd.DataFrame()

for i, row in pdbids.iterrows():
    pdb = row["PDB_id"]
    primary_chains = row["Primary_Chains"]
    secondary_chains = row["Secondary_Chains"]
    print(f"Starting {pdb} structure:\n  Primary Chains: {primary_chains}\n  Secondary Chains: {secondary_chains}")
    int_data, atomic_data = identify_primary_interacting_atoms(pdb, "./pdb_files/" + pdb + ".pdb", primary_chains, secondary_chains)
    res_data = summarise_interations_to_residues(int_data, atomic_data)
    all_interacting_atoms = pd.concat([all_interacting_atoms, int_data], ignore_index = True)
    all_interacting_residues = pd.concat([all_interacting_residues, res_data], ignore_index = True)

Starting 6XDG structure:
  Primary Chains: e
  Secondary Chains: bd
Checking 6XDG for interaction between chain e (length: 1536) and chain b (length: 1603).
  Reduced comparisons by removing 569 atoms from the primary chain e.
  Reduced comparisons by removing 871 atoms from the secondary chain b.
Checking 6XDG for interaction between chain e (length: 1536) and chain d (length: 1640).
  Reduced comparisons by removing 1222 atoms from the primary chain e.
  Reduced comparisons by removing 1166 atoms from the secondary chain d.
Starting 7ZJL structure:
  Primary Chains: abc
  Secondary Chains: ghijkl
Checking 7ZJL for interaction between chain b (length: 16631) and chain k (length: 3218).
  Reduced comparisons by removing 15203 atoms from the primary chain b.
  Reduced comparisons by removing 1898 atoms from the secondary chain k.
Checking 7ZJL for interaction between chain b (length: 16631) and chain l (length: 3309).
  Reduced comparisons by removing 16610 atoms from the primary chain 



Checking 7KMG for interaction between chain f (length: 1445) and chain e (length: 1644).
  Reduced comparisons by removing 1106 atoms from the primary chain f.
  Reduced comparisons by removing 1136 atoms from the secondary chain e.
Checking 7KMG for interaction between chain f (length: 1445) and chain b (length: 1652).
  Reduced comparisons by removing 516 atoms from the primary chain f.
  Reduced comparisons by removing 1012 atoms from the secondary chain b.
Checking 7KMG for interaction between chain f (length: 1445) and chain d (length: 1665).
  Reduced comparisons by removing 554 atoms from the primary chain f.
  Reduced comparisons by removing 897 atoms from the secondary chain d.
Checking 7KMG for interaction between chain f (length: 1445) and chain a (length: 1705).
  Reduced comparisons by removing 732 atoms from the primary chain f.
  Reduced comparisons by removing 1403 atoms from the secondary chain a.
Checking 7KMG for interaction between chain c (length: 1458) and chain e



Checking 7CM4 for interaction between chain a (length: 1565) and chain i (length: 0).
Checking 7CM4 for interaction between chain a (length: 1565) and chain h (length: 1750).
  Reduced comparisons by removing 581 atoms from the primary chain a.
  Reduced comparisons by removing 972 atoms from the secondary chain h.
Starting 7L7D structure:
  Primary Chains: e
  Secondary Chains: hl




Checking 7L7D for interaction between chain e (length: 1564) and chain h (length: 1753).
  Reduced comparisons by removing 704 atoms from the primary chain e.
  Reduced comparisons by removing 1031 atoms from the secondary chain h.
Checking 7L7D for interaction between chain e (length: 1564) and chain l (length: 1746).
  Reduced comparisons by removing 1191 atoms from the primary chain e.
  Reduced comparisons by removing 1169 atoms from the secondary chain l.
Starting 8SUO structure:
  Primary Chains: a
  Secondary Chains: im
Checking 8SUO for interaction between chain a (length: 1550) and chain i (length: 1709).
  Reduced comparisons by removing 580 atoms from the primary chain a.
  Reduced comparisons by removing 1101 atoms from the secondary chain i.
Checking 8SUO for interaction between chain a (length: 1550) and chain m (length: 1676).
  Reduced comparisons by removing 625 atoms from the primary chain a.
  Reduced comparisons by removing 1023 atoms from the secondary chain m.
Sta



Checking 7B3O for interaction between chain e (length: 2887) and chain h (length: 3284).
  Reduced comparisons by removing 950 atoms from the primary chain e.
  Reduced comparisons by removing 2012 atoms from the secondary chain h.
Checking 7B3O for interaction between chain e (length: 2887) and chain l (length: 3388).
  Reduced comparisons by removing 900 atoms from the primary chain e.
  Reduced comparisons by removing 2193 atoms from the secondary chain l.
Starting 8CWV structure:
  Primary Chains: a
  Secondary Chains: hl
Checking 8CWV for interaction between chain a (length: 1549) and chain h (length: 1611).
  Reduced comparisons by removing 549 atoms from the primary chain a.
  Reduced comparisons by removing 950 atoms from the secondary chain h.
Checking 8CWV for interaction between chain a (length: 1549) and chain l (length: 1650).
  Reduced comparisons by removing 518 atoms from the primary chain a.
  Reduced comparisons by removing 1027 atoms from the secondary chain l.
Start



Checking 7BYR for interaction between chain b (length: 7821) and chain h (length: 807).
  Reduced comparisons by removing 6906 atoms from the primary chain b.
  Reduced comparisons by removing 57 atoms from the secondary chain h.
Checking 7BYR for interaction between chain b (length: 7821) and chain l (length: 809).
  Reduced comparisons by removing 7632 atoms from the primary chain b.
  Reduced comparisons by removing 374 atoms from the secondary chain l.
Checking 7BYR for interaction between chain c (length: 7467) and chain h (length: 807).
  Reduced comparisons by removing 6962 atoms from the primary chain c.
  Reduced comparisons by removing 379 atoms from the secondary chain h.
Checking 7BYR for interaction between chain c (length: 7467) and chain l (length: 809).
  Reduced comparisons by removing 7437 atoms from the primary chain c.
  Reduced comparisons by removing 799 atoms from the secondary chain l.
Checking 7BYR for interaction between chain a (length: 7479) and chain h (len



Checking 7KZB for interaction between chain c (length: 1464) and chain h (length: 1567).
  Reduced comparisons by removing 584 atoms from the primary chain c.
  Reduced comparisons by removing 942 atoms from the secondary chain h.
Checking 7KZB for interaction between chain c (length: 1464) and chain l (length: 1595).
  Reduced comparisons by removing 444 atoms from the primary chain c.
  Reduced comparisons by removing 1021 atoms from the secondary chain l.
Starting 8WFH structure:
  Primary Chains: e
  Secondary Chains: a
Checking 8WFH for interaction between chain e (length: 1754) and chain a (length: 1530).
  Reduced comparisons by removing 421 atoms from the primary chain e.
  Reduced comparisons by removing 464 atoms from the secondary chain a.
Starting 7KZB structure:
  Primary Chains: c
  Secondary Chains: ab




Checking 7KZB for interaction between chain c (length: 1464) and chain b (length: 1386).
  Reduced comparisons by removing 502 atoms from the primary chain c.
  Reduced comparisons by removing 774 atoms from the secondary chain b.
Checking 7KZB for interaction between chain c (length: 1464) and chain a (length: 1095).
  Reduced comparisons by removing 852 atoms from the primary chain c.
  Reduced comparisons by removing 619 atoms from the secondary chain a.
Starting 7D4G structure:
  Primary Chains: b
  Secondary Chains: gn
Checking 7D4G for interaction between chain b (length: 2206) and chain g (length: 788).
  Reduced comparisons by removing 1867 atoms from the primary chain b.
  Reduced comparisons by removing 235 atoms from the secondary chain g.
Checking 7D4G for interaction between chain b (length: 2206) and chain n (length: 931).
  Reduced comparisons by removing 1386 atoms from the primary chain b.
  Reduced comparisons by removing 212 atoms from the secondary chain n.
Starting



Checking 7C8W for interaction between chain b (length: 1544) and chain a (length: 927).
  Reduced comparisons by removing 571 atoms from the primary chain b.
  Reduced comparisons by removing 39 atoms from the secondary chain a.
Starting 6WAQ structure:
  Primary Chains: bd
  Secondary Chains: ac




Checking 6WAQ for interaction between chain b (length: 1563) and chain c (length: 987).
  Reduced comparisons by removing 1537 atoms from the primary chain b.
  Reduced comparisons by removing 904 atoms from the secondary chain c.
Checking 6WAQ for interaction between chain b (length: 1563) and chain a (length: 973).
  Reduced comparisons by removing 344 atoms from the primary chain b.
  Reduced comparisons by removing 80 atoms from the secondary chain a.
Checking 6WAQ for interaction between chain d (length: 1554) and chain c (length: 987).
  Reduced comparisons by removing 350 atoms from the primary chain d.
  Reduced comparisons by removing 86 atoms from the secondary chain c.
Checking 6WAQ for interaction between chain d (length: 1554) and chain a (length: 973).
  Reduced comparisons by removing 1528 atoms from the primary chain d.
  Reduced comparisons by removing 892 atoms from the secondary chain a.
Starting 6ZXN structure:
  Primary Chains: abc
  Secondary Chains: def




Checking 6ZXN for interaction between chain b (length: 16620) and chain e (length: 1752).
  Reduced comparisons by removing 14503 atoms from the primary chain b.
  Reduced comparisons by removing 126 atoms from the secondary chain e.
Checking 6ZXN for interaction between chain b (length: 16620) and chain f (length: 1752).
  Reduced comparisons by removing 16599 atoms from the primary chain b.
  Reduced comparisons by removing 1716 atoms from the secondary chain f.
Checking 6ZXN for interaction between chain b (length: 16620) and chain d (length: 1752).
  Reduced comparisons by removing 16573 atoms from the primary chain b.
  Reduced comparisons by removing 1693 atoms from the secondary chain d.
Checking 6ZXN for interaction between chain c (length: 16620) and chain e (length: 1752).
  Reduced comparisons by removing 15380 atoms from the primary chain c.
  Reduced comparisons by removing 920 atoms from the secondary chain e.
Checking 6ZXN for interaction between chain c (length: 16620) 



Checking 6WAR for interaction between chain k (length: 1621) and chain f (length: 895).
Checking 6WAR for interaction between chain k (length: 1621) and chain l (length: 861).
  Reduced comparisons by removing 794 atoms from the primary chain k.
  Reduced comparisons by removing 137 atoms from the secondary chain l.
Checking 6WAR for interaction between chain k (length: 1621) and chain h (length: 895).
Checking 6WAR for interaction between chain k (length: 1621) and chain b (length: 901).
  Reduced comparisons by removing 1439 atoms from the primary chain k.
  Reduced comparisons by removing 620 atoms from the secondary chain b.
Checking 6WAR for interaction between chain k (length: 1621) and chain j (length: 895).
  Reduced comparisons by removing 1613 atoms from the primary chain k.
  Reduced comparisons by removing 877 atoms from the secondary chain j.
Checking 6WAR for interaction between chain k (length: 1621) and chain p (length: 895).
  Reduced comparisons by removing 1377 atoms

## Merge discovered data with PDB identifier object

In [6]:
output_atoms = pd.merge(pdbids, all_interacting_atoms, left_on = "PDB_id", right_on = "pdb", how = "left")
output_resid = pd.merge(pdbids, all_interacting_residues, left_on = "PDB_id", right_on = "pdb", how = "left")

In [7]:
output_atoms

Unnamed: 0,Antibody,Short_Name,PDB_id,Primary_Chains,Secondary_Chains,pdb,chain,residnum,resid,atom,coord,int_chain,int_residnum,int_resid,int_atom,int_coord,mid_coord,int_dist
0,Casirivimab,CoV_binder_1,6XDG,e,bd,6XDG,e,417,K,CD,"[118.848, 97.167, 126.598]",b,31,D,OD2,"[116.813, 100.267, 125.558]","[117.83050537109375, 98.71699523925781, 126.07...",3.851337
1,Casirivimab,CoV_binder_1,6XDG,e,bd,6XDG,e,417,K,CE,"[119.058, 98.564, 127.154]",b,28,T,OG1,"[118.558, 101.564, 125.912]","[118.80799865722656, 100.06400299072266, 126.5...",3.285203
2,Casirivimab,CoV_binder_1,6XDG,e,bd,6XDG,e,417,K,CE,"[119.058, 98.564, 127.154]",b,31,D,OD2,"[116.813, 100.267, 125.558]","[117.93550109863281, 99.41549682617188, 126.35...",3.238428
3,Casirivimab,CoV_binder_1,6XDG,e,bd,6XDG,e,417,K,NZ,"[118.607, 98.675, 128.569]",b,28,T,OG1,"[118.558, 101.564, 125.912]","[118.58250427246094, 100.1195068359375, 127.24...",3.925348
4,Casirivimab,CoV_binder_1,6XDG,e,bd,6XDG,e,417,K,NZ,"[118.607, 98.675, 128.569]",b,31,D,OD2,"[116.813, 100.267, 125.558]","[117.71000671386719, 99.47100067138672, 127.06...",3.849547
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7828,VHH-55,CoV_binder_33,6WAR,acegikmo,bdfhjlnp,6WAR,i,555,V,CG1,"[-58.633, -53.772, 20.647]",j,99,W,CH2,"[-55.996, -52.351, 23.268]","[-57.31449890136719, -53.061500549316406, 21.9...",3.980283
7829,VHH-55,CoV_binder_33,6WAR,acegikmo,bdfhjlnp,6WAR,i,555,V,CG2,"[-60.513, -52.443, 19.796]",j,100,G,N,"[-57.756, -51.553, 17.302]","[-59.134498596191406, -51.99800109863281, 18.5...",3.822719
7830,VHH-55,CoV_binder_33,6WAR,acegikmo,bdfhjlnp,6WAR,i,555,V,CG2,"[-60.513, -52.443, 19.796]",j,100,G,CA,"[-59.055, -51.132, 16.842]","[-59.784000396728516, -51.787498474121094, 18....",3.545505
7831,VHH-55,CoV_binder_33,6WAR,acegikmo,bdfhjlnp,6WAR,i,557,S,N,"[-56.364, -55.719, 25.255]",j,99,W,CH2,"[-55.996, -52.351, 23.268]","[-56.18000030517578, -54.035003662109375, 24.2...",3.927724


In [8]:
output_resid

Unnamed: 0,Antibody,Short_Name,PDB_id,Primary_Chains,Secondary_Chains,pdb,chain,residnum,resid,int_chain,int_residnum,int_resid,int_dist,coord,int_coord,CA_dist,CA_midpoint
0,Casirivimab,CoV_binder_1,6XDG,e,bd,6XDG,e,417,K,b,28,T,3.285203,"[119.511, 94.487, 125.377]","[119.421, 103.778, 125.522]",9.292567,"[119.46600341796875, 99.13249969482422, 125.44..."
1,Casirivimab,CoV_binder_1,6XDG,e,bd,6XDG,e,417,K,b,31,D,3.238428,"[119.511, 94.487, 125.377]","[113.87, 101.845, 126.984]",9.409757,"[116.69050598144531, 98.16600036621094, 126.18..."
2,Casirivimab,CoV_binder_1,6XDG,e,bd,6XDG,e,417,K,b,102,T,3.538583,"[119.511, 94.487, 125.377]","[115.229, 97.778, 132.133]",8.649262,"[117.3699951171875, 96.13249969482422, 128.755..."
3,Casirivimab,CoV_binder_1,6XDG,e,bd,6XDG,e,453,Y,b,30,S,3.853658,"[113.447, 94.488, 119.599]","[114.209, 104.503, 124.267]",11.075697,"[113.8280029296875, 99.49549865722656, 121.932..."
4,Casirivimab,CoV_binder_1,6XDG,e,bd,6XDG,e,453,Y,b,31,D,2.836727,"[113.447, 94.488, 119.599]","[113.87, 101.845, 126.984]",10.432769,"[113.65850067138672, 98.16650390625, 123.29150..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1461,VHH-55,CoV_binder_33,6WAR,acegikmo,bdfhjlnp,6WAR,o,553,W,p,100,P,3.688311,"[-53.765, 28.442, 30.68]","[-47.299, 33.721, 26.634]",9.276158,"[-50.53199768066406, 31.081501007080078, 28.65..."
1462,VHH-55,CoV_binder_33,6WAR,acegikmo,bdfhjlnp,6WAR,o,553,W,p,100,P,3.688311,"[-53.765, 28.442, 30.68]","[-43.726, 37.899, 24.007]",15.321398,"[-48.74549865722656, 33.17049789428711, 27.343..."
1463,VHH-55,CoV_binder_33,6WAR,acegikmo,bdfhjlnp,6WAR,o,555,V,p,99,W,3.445660,"[-53.419, 32.974, 35.046]","[-51.813, 38.171, 30.744]",6.935077,"[-52.615997314453125, 35.57250213623047, 32.89..."
1464,VHH-55,CoV_binder_33,6WAR,acegikmo,bdfhjlnp,6WAR,o,555,V,p,100,G,3.518310,"[-53.419, 32.974, 35.046]","[-52.806, 34.665, 29.73]",5.612052,"[-53.11249923706055, 33.81949996948242, 32.388..."


## Output to CSV files to 'additional_data'

In [9]:
output_atoms.to_csv("./additional_data/interacting_atoms.csv", index = False)
output_resid.to_csv("./additional_data/interacting_residuals.csv", index = False)

In [10]:
print("Notebook completed.")

Notebook completed.
