In [1]:
import numpy as np
import MDAnalysis as mda
from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import HydrogenBondAnalysis

In [2]:
workdir = "data/cWza-S355C_conformation1_0050/"
u = mda.Universe(workdir+'md_100ns.tpr', workdir+'md_100ns.xtc',in_memory=True)



In [128]:
# Check segments IDs
u.atoms.segments
list(u.atoms.segments)

[<Segment seg_0_Protein_chain_A>,
 <Segment seg_10_K>,
 <Segment seg_11_CL>,
 <Segment seg_1_Protein_chain_B>,
 <Segment seg_2_Protein_chain_C>,
 <Segment seg_3_Protein_chain_D>,
 <Segment seg_4_Protein_chain_E>,
 <Segment seg_5_Protein_chain_F>,
 <Segment seg_6_Protein_chain_G>,
 <Segment seg_7_Protein_chain_H>,
 <Segment seg_8_POPC>,
 <Segment seg_9_SOL>]

In [323]:
hbonds = HydrogenBondAnalysis(universe=u)

protein_hydrogens_sel = hbonds.guess_hydrogens("protein")
protein_acceptors_sel = hbonds.guess_acceptors("protein")
hbonds.hydrogens_sel = f"not backbone and segid seg_0_Protein_chain_A"
hbonds.acceptors_sel = f"not backbone and segid seg_1_Protein_chain_B"
hbonds.run()

<MDAnalysis.analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis at 0x7febe9f052b0>

* Filter by type, select the top 16 
* Filter IDs accoridng to type

In [351]:
hbond_counting_raw = hbonds.count_by_type()
hbond_counting_sorted = sorted(hbond_counting_raw, key=lambda x: int(x[-1]), reverse=True)
hbond_counting_sorted



[array(['VAL:opls_135', 'VAL:opls_140', '22218'], dtype='<U21'),
 array(['ILE:opls_135', 'ILE:opls_140', '19202'], dtype='<U21'),
 array(['LEU:opls_135', 'LEU:opls_140', '17892'], dtype='<U21'),
 array(['THR:opls_135', 'THR:opls_140', '11665'], dtype='<U21'),
 array(['VAL:opls_137', 'VAL:opls_135', '9985'], dtype='<U21'),
 array(['ARG:opls_300', 'ARG:opls_301', '9881'], dtype='<U21'),
 array(['PRO:opls_136', 'PRO:opls_140', '7694'], dtype='<U21'),
 array(['TRP:opls_145', 'TRP:opls_146', '7581'], dtype='<U21'),
 array(['LEU:opls_137', 'LEU:opls_135', '7109'], dtype='<U21'),
 array(['LYS:opls_136', 'LYS:opls_140', '6832'], dtype='<U21'),
 array(['THR:opls_224B', 'THR:opls_140', '6699'], dtype='<U21'),
 array(['VAL:opls_224B', 'VAL:opls_140', '6582'], dtype='<U21'),
 array(['ARG:opls_308', 'ARG:opls_140', '6180'], dtype='<U21'),
 array(['LEU:opls_136', 'LEU:opls_140', '5682'], dtype='<U21'),
 array(['LEU:opls_224B', 'LEU:opls_140', '5563'], dtype='<U21'),
 array(['THR:opls_158', 'THR:opls

In [344]:
data_destilated = [x for x in hbond_counting_sorted if int(x[-1])>1000]
set_donors_acceptors = set([(x[0].split(':')[0], x[1].split(':')[0]) for x in data_destilated])
print(set_donors_acceptors)

{('GLN', 'GLN'), ('ILE', 'ILE'), ('PRO', 'PRO'), ('HIS', 'HIS'), ('ARG', 'ARG'), ('VAL', 'VAL'), ('TYR', 'GLU'), ('ARG', 'GLU'), ('GLU', 'GLU'), ('CYS', 'CYS'), ('LYS', 'LYS'), ('HIS', 'ASP'), ('LEU', 'LEU'), ('ALA', 'ALA'), ('ASN', 'ASN'), ('ARG', 'ASP'), ('ASP', 'ASP'), ('TRP', 'TRP'), ('GLY', 'GLY'), ('TYR', 'TYR'), ('THR', 'THR')}


In [352]:
y = set([x[0] for x in set_donors_acceptors] + [x[1] for x in set_donors_acceptors])

In [354]:
print(' '.join(y))

GLN ASP LEU THR ARG ASN HIS TYR TRP PRO ILE GLY GLU LYS CYS VAL ALA


New analysis with fewer residues

In [355]:
hbonds = HydrogenBondAnalysis(universe=u)

protein_hydrogens_sel = hbonds.guess_hydrogens("protein")
protein_acceptors_sel = hbonds.guess_acceptors("protein")

In [356]:
hbonds.hydrogens_sel = f"not backbone and resname GLN ASP LEU THR ARG ASN HIS TYR TRP PRO ILE GLY GLU LYS CYS VAL ALA"
hbonds.acceptors_sel = f"not backbone and resname GLN ASP LEU THR ARG ASN HIS TYR TRP PRO ILE GLY GLU LYS CYS VAL ALA"
hbonds.run()

<MDAnalysis.analysis.hydrogenbonds.hbond_analysis.HydrogenBondAnalysis at 0x7febe965fb80>

In [357]:
hbond_counting_raw = hbonds.count_by_type()
hbond_counting_sorted = sorted(hbond_counting_raw, key=lambda x: int(x[-1]), reverse=True)
hbond_counting_sorted

[array(['VAL:opls_135', 'VAL:opls_140', '22218'], dtype='<U21'),
 array(['ILE:opls_135', 'ILE:opls_140', '19202'], dtype='<U21'),
 array(['LEU:opls_135', 'LEU:opls_140', '17892'], dtype='<U21'),
 array(['THR:opls_135', 'THR:opls_140', '11665'], dtype='<U21'),
 array(['VAL:opls_137', 'VAL:opls_135', '9985'], dtype='<U21'),
 array(['ARG:opls_300', 'ARG:opls_301', '9881'], dtype='<U21'),
 array(['PRO:opls_136', 'PRO:opls_140', '7694'], dtype='<U21'),
 array(['TRP:opls_145', 'TRP:opls_146', '7581'], dtype='<U21'),
 array(['LEU:opls_137', 'LEU:opls_135', '7109'], dtype='<U21'),
 array(['LYS:opls_136', 'LYS:opls_140', '6832'], dtype='<U21'),
 array(['THR:opls_224B', 'THR:opls_140', '6699'], dtype='<U21'),
 array(['VAL:opls_224B', 'VAL:opls_140', '6582'], dtype='<U21'),
 array(['ARG:opls_308', 'ARG:opls_140', '6180'], dtype='<U21'),
 array(['LEU:opls_136', 'LEU:opls_140', '5682'], dtype='<U21'),
 array(['LEU:opls_224B', 'LEU:opls_140', '5563'], dtype='<U21'),
 array(['THR:opls_158', 'THR:opls

In [358]:
def process_hbond_metadata(hbond_metadata):
    processed_data = []
    for x in hbond_metadata:
        #hbond_donor_acceptor = [x.segid.split('_')[-1], str(x.resnum), x.resname, x.name]
        if x.resnum%32 == 0:
            resnum = 32
        else:
            resnum = x.resnum%32
        hbond_donor_acceptor = [x.segid.split('_')[-1],str(resnum), x.resname, x.name]
        hbond_identifier = '-'.join(hbond_donor_acceptor)
        processed_data.append(hbond_identifier)
        
    return processed_data

In [359]:
import pandas as pd
df = pd.DataFrame(columns = ['frame', 'donor', 'donor-H', 'acceptor'])

for i in range(len(hbonds.results.hbonds)):
    hbond = hbonds.results.hbonds[i]
    frame, donor_ix, hydrogen_ix, acceptor_ix = hbond[:4].astype(int)

    u.trajectory[frame]
    hbond_metadata = u.atoms[[donor_ix, hydrogen_ix, acceptor_ix]]
    
    y1, y2, y3 = process_hbond_metadata(hbond_metadata)
    if '-'.join(y1.split('-')[:3]) != '-'.join(y3.split('-')[:3]):
        df.loc[i] = [frame] + [y1, y2, y3]

In [360]:
df

Unnamed: 0,frame,donor,donor-H,acceptor
13,0,A-7-ASN-ND2,A-7-ASN-HD21,B-13-LEU-HD12
36,0,A-26-THR-OG1,A-26-THR-HG1,H-25-GLU-OE1
39,0,A-28-ARG-NH2,A-28-ARG-HH22,A-24-THR-OG1
104,0,C-10-ILE-CA,C-10-ILE-HA,C-14-VAL-HG23
126,0,C-26-THR-OG1,C-26-THR-HG1,B-25-GLU-OE1
...,...,...,...,...
374534,1000,F-21-HIS-ND1,F-21-HIS-HD1,G-22-ASP-OD2
374567,1000,G-21-HIS-ND1,G-21-HIS-HD1,H-22-ASP-OD1
374614,1000,H-20-VAL-CA,H-20-VAL-HA,H-23-LEU-HD13
374618,1000,H-23-LEU-CD1,H-23-LEU-HD13,H-20-VAL-HA


In [361]:
df1 = df[['donor', 'acceptor']].drop_duplicates()

In [381]:
df1[['ILE' in x for x in df1['donor'].values]]

Unnamed: 0,donor,acceptor
104,C-10-ILE-CA,C-14-VAL-HG23
2814,C-10-ILE-CB,C-7-ASN-HA
4471,A-30-ILE-CG1,A-31-LYS-HG1
7520,E-10-ILE-CD,E-13-LEU-HD11
10698,B-10-ILE-CA,B-13-LEU-HD11
...,...,...
326667,E-10-ILE-CD,E-7-ASN-HA
326820,H-30-ILE-CG2,H-32-THR-H
335094,D-17-ILE-CA,D-20-VAL-HG21
335339,A-30-ILE-CG2,A-32-THR-H


In [362]:
len(df), len(df1)

(18863, 989)

In [375]:
len(set(['-'.join(x.split('-')[1:]) for x in df1['donor'].values]))
set(['-'.join(x.split('-')[1:]) for x in df1['donor'].values])

{'1-ALA-CB',
 '10-ILE-CA',
 '10-ILE-CB',
 '10-ILE-CD',
 '10-ILE-CG1',
 '10-ILE-CG2',
 '10-ILE-N',
 '11-CYS-CA',
 '11-CYS-CB',
 '11-CYS-SG',
 '12-GLN-CB',
 '12-GLN-CD',
 '12-GLN-CG',
 '12-GLN-NE2',
 '13-LEU-CA',
 '13-LEU-CB',
 '13-LEU-CD1',
 '13-LEU-CD2',
 '13-LEU-CG',
 '14-VAL-CA',
 '14-VAL-CB',
 '14-VAL-CG1',
 '14-VAL-CG2',
 '15-PRO-CA',
 '15-PRO-CB',
 '15-PRO-CD',
 '15-PRO-CG',
 '16-THR-CA',
 '16-THR-CB',
 '16-THR-CG2',
 '16-THR-N',
 '16-THR-OG1',
 '17-ILE-CA',
 '17-ILE-CB',
 '17-ILE-CD',
 '17-ILE-CG1',
 '17-ILE-CG2',
 '17-ILE-N',
 '18-THR-CA',
 '18-THR-CB',
 '18-THR-CG2',
 '18-THR-OG1',
 '19-GLY-CA',
 '2-PRO-CB',
 '2-PRO-CG',
 '20-VAL-CA',
 '20-VAL-CG1',
 '20-VAL-CG2',
 '20-VAL-N',
 '21-HIS-CA',
 '21-HIS-CB',
 '21-HIS-CD2',
 '21-HIS-CE1',
 '21-HIS-N',
 '21-HIS-ND1',
 '22-ASP-CA',
 '22-ASP-CB',
 '22-ASP-CG',
 '23-LEU-CA',
 '23-LEU-CB',
 '23-LEU-CD1',
 '23-LEU-CD2',
 '23-LEU-CG',
 '24-THR-CA',
 '24-THR-CB',
 '24-THR-CG2',
 '24-THR-OG1',
 '25-GLU-CA',
 '25-GLU-CB',
 '25-GLU-CD',
 '25-G

In [374]:
len(set(['-'.join(x.split('-')[1:]) for x in df1['acceptor'].values]))
set(['-'.join(x.split('-')[1:]) for x in df1['acceptor'].values])

{'1-ALA-HB2',
 '10-ILE-H',
 '10-ILE-HA',
 '10-ILE-HB',
 '10-ILE-HD1',
 '10-ILE-HD2',
 '10-ILE-HD3',
 '10-ILE-HG12',
 '10-ILE-HG21',
 '10-ILE-HG22',
 '10-ILE-HG23',
 '11-CYS-HA',
 '11-CYS-HB1',
 '11-CYS-HB2',
 '11-CYS-HG',
 '12-GLN-HB1',
 '12-GLN-HB2',
 '12-GLN-HE21',
 '12-GLN-HE22',
 '12-GLN-HG1',
 '12-GLN-HG2',
 '12-GLN-OE1',
 '13-LEU-HA',
 '13-LEU-HB1',
 '13-LEU-HB2',
 '13-LEU-HD11',
 '13-LEU-HD12',
 '13-LEU-HD13',
 '13-LEU-HD21',
 '13-LEU-HD22',
 '13-LEU-HD23',
 '13-LEU-HG',
 '14-VAL-HA',
 '14-VAL-HG11',
 '14-VAL-HG12',
 '14-VAL-HG13',
 '14-VAL-HG21',
 '14-VAL-HG22',
 '14-VAL-HG23',
 '15-PRO-HA',
 '15-PRO-HB1',
 '15-PRO-HB2',
 '15-PRO-HD1',
 '15-PRO-HD2',
 '15-PRO-HG1',
 '16-THR-CG2',
 '16-THR-HA',
 '16-THR-HB',
 '16-THR-HG1',
 '16-THR-HG21',
 '16-THR-HG22',
 '16-THR-HG23',
 '16-THR-OG1',
 '17-ILE-H',
 '17-ILE-HA',
 '17-ILE-HB',
 '17-ILE-HD1',
 '17-ILE-HD2',
 '17-ILE-HD3',
 '17-ILE-HG11',
 '17-ILE-HG12',
 '17-ILE-HG21',
 '17-ILE-HG22',
 '17-ILE-HG23',
 '18-THR-HA',
 '18-THR-HB',
 '1

In [None]:
# Intra-chain
## Backbone - Backbone

## Backbone - SC

## SC - SC

# Inter-chain

## Backbone - Backbone

## Backbone - SC

## SC - SC

In [114]:
selection = u.select_atoms('protein')
selection

<AtomGroup with 4296 atoms>

In [127]:
selection = u.select_atoms('protein and segid seg_0_Protein_chain_A')
selection

<AtomGroup with 537 atoms>

In [116]:
selection = u.select_atoms('backbone and segid seg_0_Protein_chain_A')
selection

<AtomGroup with 127 atoms>

In [121]:
selection = u.select_atoms('not backbone and segid seg_0_Protein_chain_A')
selection

<AtomGroup with 410 atoms>

In [98]:
Chains = ['A', 'B','C','D','E','F','G','H']

protein_chains = {}
for i in range(len(Chains)):
    chain_segid = 'seg_'+str(i)+'_Protein_chain_' + Chains[i]
    protein_chains[Chains[i]] = u.select_atoms('segid ' + chain_segid)