In [1]:
import numpy as np
import pandas as pd
import mdtraj
import sys
import os

sys.path.append('../..')
from openabc.forcefields.parsers import MOFFParser
from openabc.utils.helper_functions import parse_pdb, write_pdb

## Example of how to get native pairs for HP1beta dimer

File hp1b_AA.pdb is the built pdb structure. It is composed of two identical monomers. However, because the each C-terminal tail has overlap with CSD from the other monomer, this can affect the native pairs found in CSD by shadow algorithm. 

To deal with this issue, we first achieve native pairs within each CD and CSD by parsing a monomer, then we achieve native pairs between two CSDs by parsing two CSDs as an integrity. 

In [2]:
# get monomer and parse monomer
hp1beta_dimer = parse_pdb('hp1b_AA.pdb')
hp1beta_monomer = hp1beta_dimer.loc[hp1beta_dimer['chainID'] == 'A'].copy()
write_pdb(hp1beta_monomer, 'hp1b_AA_monomer.pdb')
hp1beta_monomer_parser = MOFFParser.from_atomistic_pdb('hp1b_AA_monomer.pdb', 'hp1b_CA_monomer.pdb')

# only keep native pairs within CD and CSD
cd = np.arange(20, 79)
csd = np.arange(116, 175)
hp1beta_monomer_native_pairs = pd.DataFrame(columns=hp1beta_monomer_parser.native_pairs.columns)
for i, row in hp1beta_monomer_parser.native_pairs.iterrows():
    a1, a2 = int(row['a1']), int(row['a2'])
    if a1 > a2:
        a1, a2 = a2, a1
    if ((a1 in cd) and (a2 in cd)) or ((a1 in csd) and (a2 in csd)):
        hp1beta_monomer_native_pairs.loc[len(hp1beta_monomer_native_pairs.index)] = row

Parse configuration with default settings.
Get native pairs with shadow algorithm.


In [3]:
# get two CSDs as an integrity and parse it to get native pairs between two CSDs
hp1beta_dimer_csd = hp1beta_dimer.loc[hp1beta_dimer['resSeq'].isin(csd + 1)].copy()
write_pdb(hp1beta_dimer_csd, 'hp1b_AA_dimer_csd.pdb')
hp1beta_dimer_csd_parser = MOFFParser.from_atomistic_pdb('hp1b_AA_dimer_csd.pdb', 'hp1b_CA_dimer_csd.pdb')
hp1beta_dimer_inter_csd_native_pairs = pd.DataFrame(columns=hp1beta_dimer_csd_parser.native_pairs.columns)

for i, row in hp1beta_dimer_csd_parser.native_pairs.iterrows():
    a1, a2 = int(row['a1']), int(row['a2'])
    if a1 > a2:
        a1, a2 = a2, a1
    if (a1 < len(csd)) and (a2 >= len(csd)):
        hp1beta_dimer_inter_csd_native_pairs.loc[len(hp1beta_dimer_inter_csd_native_pairs.index)] = row


Parse configuration with default settings.
Get native pairs with shadow algorithm.
Distance between atom 342 and 817 is 0.07814300072265688 nm, which is smaller than the radius (0.1 nm), so we ignore this atom pair. This means maybe the radius is too large or atoms 342 and 817 are too close.


In [4]:
# combine all the native pairs
#print(hp1beta_monomer_native_pairs)
#print(hp1beta_dimer_inter_csd_native_pairs)

hp1beta_dimer_native_pairs = pd.DataFrame(columns=['a1', 'a2', 'mu'])
n_res_each_monomer = len(hp1beta_monomer_parser.atoms.index)
for i, row in hp1beta_monomer_native_pairs.iterrows():
    a1, a2, mu = int(row['a1']), int(row['a2']), float(row['mu'])
    row1 = [a1, a2, mu]
    row2 = [a1 + n_res_each_monomer, a2 + n_res_each_monomer, mu]
    hp1beta_dimer_native_pairs.loc[len(hp1beta_dimer_native_pairs.index)] = row1
    hp1beta_dimer_native_pairs.loc[len(hp1beta_dimer_native_pairs.index)] = row2
    
for i, row in hp1beta_dimer_inter_csd_native_pairs.iterrows():
    a1, a2, mu = int(row['a1']), int(row['a2']), float(row['mu'])
    assert ((a1 < len(csd)) and (a2 >= len(csd)))
    a1 += 116
    a2 += (n_res_each_monomer + 116 - len(csd))
    hp1beta_dimer_native_pairs.loc[len(hp1beta_dimer_native_pairs.index)] = [a1, a2, mu]

hp1beta_dimer_native_pairs = hp1beta_dimer_native_pairs.sort_values(by=['a1', 'a2'])
hp1beta_dimer_native_pairs.to_csv('hp1beta_dimer_native_pairs.csv', index=False)