# Create Hbond network data

In [1]:
import mdtraj as md
import pandas as pd
import numpy as np
import MDAnalysis as mda
import MDAnalysis.analysis.hbonds as hbonds
import matplotlib
import matplotlib.pyplot as plt
import ast


In [2]:
traj_notes = ['5NJY_pH70_md1','5NJY_pH70_md2','5NJY_pH70_md3','5NJY_F238L_pH70_md1',
                 '5NJY_F238L_pH70_md2','5NJY_F238L_pH70_md3','5NJY_I233T_pH70_md4',
                  '5NJY_I233T_pH70_md2','5NJY_I233T_pH70_md3','5NJY_F238LI233T_pH70_md1',
                  '5NJY_F238LI233T_pH70_md2','5NJY_F238LI233T_pH70_md3','5NJY_pH46_md1','5NJY_pH46_md2','5NJY_pH46_md3','5NJY_F238L_pH46_md1',
                 '5NJY_F238L_pH46_md2','5NJY_F238L_pH46_md3','5NJY_I233T_pH46_md1',
                  '5NJY_I233T_pH46_md2','5NJY_I233T_pH46_md3','5NJY_F238LI233T_pH46_md1',
                  '5NJY_F238LI233T_pH46_md2','5NJY_F238LI233T_pH46_md3','4HFI_pH46_md1','4HFI_pH46_md2','4HFI_pH46_md3','4HFI_F238L_pH46_md1',
                 '4HFI_F238L_pH46_md2','4HFI_F238L_pH46_md3','4HFI_I233T_pH46_md1',
                  '4HFI_I233T_pH46_md2','4HFI_I233T_pH46_md3','4HFI_F238LI233T_pH46_md1',
                  '4HFI_F238LI233T_pH46_md2','4HFI_F238LI233T_pH46_md3','4NPQ_pH70_md5','4NPQ_pH70_md6','4NPQ_pH70_md7','4NPQ_F238L_pH70_md3',
                 '4NPQ_F238L_pH70_md4','4NPQ_F238L_pH70_md5','4NPQ_I233T_pH70_md3',
                  '4NPQ_I233T_pH70_md4','4NPQ_I233T_pH70_md5','4NPQ_F238LI233T_pH70_md3',
                  '4NPQ_F238LI233T_pH70_md4','4NPQ_F238LI233T_pH70_md5']

In [3]:
def create_Hbond_network_analysis_data(traj_note, location = '/media/scottzhuang/data/MD/', skip=10):
    top_location = traj_note + '/' + traj_note + ".system.pdb"
    traj_location = traj_note + '/' + traj_note + ".skip" + str(skip) + ".system.xtc"
    traj = md.load(location + traj_location,top = location + top_location)
    topology = traj.topology
    table, bonds = topology.to_dataframe()
    n_frames = traj.n_frames
    zero_data = np.zeros(shape=([118*5,118*5]))
    hbond_network_dataframe = pd.DataFrame()
    for time in range(0,n_frames*10000,10000):  
        hbond_network = pd.DataFrame(zero_data,columns=np.asarray([np.arange(193,311),np.arange(193,311) + 311,np.arange(193,311) + 311 * 2,np.arange(193,311) + 311 * 3,np.arange(193,311) + 311 * 4]).flatten())
        hbond_network['traj_time'] = time/10000
        hbond_network['residue'] = np.asarray([np.arange(193,311),np.arange(193,311) + 311,np.arange(193,311) + 311 * 2,np.arange(193,311) + 311 * 3,np.arange(193,311) + 311 * 4]).flatten()
        hbond_network_dataframe = hbond_network_dataframe.append(hbond_network,ignore_index=True)
    for time in range(0,n_frames*10000,10000):  
        for resid in range(198,315):
            hbond_dataframe = pd.read_csv(location + traj_note + '/' + str(resid) + '_hbond_list.csv')
            hbond_dataframe['counterpart'] = hbond_dataframe['counterpart'].apply(lambda x: ast.literal_eval(x))
            water_bridge_dataframe = pd.read_csv(location + traj_note + '/' + str(resid) + '_water_bridge_list.csv')
            water_bridge_dataframe['counterpart'] = water_bridge_dataframe['counterpart'].apply(lambda x: ast.literal_eval(x))

            for residue_id in set(hbond_dataframe['resid']):
                counterpart_residue = hbond_dataframe[(hbond_dataframe['resid'] == residue_id) & (hbond_dataframe['traj_time'] == time)]['counterpart']
                for counterpart_residue_list in counterpart_residue:
                    for counterpart_residue_id in counterpart_residue_list:
                        if (table.loc[counterpart_residue_id].resSeq >= 198) & (table.loc[counterpart_residue_id].resSeq <= 315) & (table.loc[residue_id].resSeq >= 198) & (table.loc[residue_id].resSeq <= 315):
                            residue1 = table.loc[residue_id].resSeq + (table.loc[residue_id].chainID) * 311 - 5
                            residue2 = table.loc[counterpart_residue_id].resSeq + (table.loc[counterpart_residue_id].chainID) * 311 - 5

                            hbond_network_dataframe.loc[(hbond_network_dataframe.traj_time == time/10000) & (hbond_network_dataframe.residue == residue1),residue2] = 1
            
            for residue_id in set(water_bridge_dataframe['resid']):
                counterpart_residue = water_bridge_dataframe[(water_bridge_dataframe['resid'] == residue_id) & (water_bridge_dataframe['traj_time'] == time)]['counterpart'].values
                for counterpart_residue_list in counterpart_residue:
                    for counterpart_residue_id in counterpart_residue_list:
                        if (table.loc[counterpart_residue_id].resSeq >= 198) & (table.loc[counterpart_residue_id].resSeq <= 315) & (table.loc[residue_id].resSeq >= 198) & (table.loc[residue_id].resSeq <= 315):
                            residue1 = table.loc[residue_id].resSeq + (table.loc[residue_id].chainID) * 311 - 5
                            residue2 = table.loc[counterpart_residue_id].resSeq + (table.loc[counterpart_residue_id].chainID) * 311 - 5
                            hbond_network_dataframe.loc[(hbond_network_dataframe.traj_time == time /10000) & (hbond_network_dataframe.residue == residue1),residue2] = 1
    hbond_network_dataframe.to_csv(location + traj_note + '/hbond_network.csv')
    print('finishing!')
   #         print('At time ' + str(time) + ', in total ' + str(n_hbond) + ' hydrogen bonds between residues, resid '
   #               + str(resid) + ' forms ' + str(n_intra_hbond) + ' intra-subunit H-bond and ' + str(n_inter_hbond) + ' inter-subunit H-bond.')

In [None]:
from joblib import Parallel, delayed
import multiprocessing
num_cores = multiprocessing.cpu_count()
Parallel(n_jobs=num_cores)(delayed(create_Hbond_network_analysis_data)(traj_note) for traj_note in traj_notes)

finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!
finishing!


In [4]:
def create_hbond_contact_map(traj_note, location = '/media/scottzhuang/data/MD/', skip=10):
    top_location = traj_note + '/' + traj_note + ".ca.pdb"
    traj_location = traj_note + '/' + traj_note + ".skip" + str(skip) + ".ca.xtc"
    traj = md.load(location + traj_location,top = location + top_location)
    n_frames = traj.n_frames
    hbond_contact_map = pd.DataFrame(columns=['residue1','residue2','interaction'])
    hbond_network_dataframe = pd.read_csv(location + traj_note + '/hbond_network.csv')
    for column in hbond_network_dataframe.columns:
        if (column !=  'traj_time') & (column != 'residue') & (column != 'Unnamed: 0'):
            residue1s = hbond_network_dataframe.loc[(hbond_network_dataframe[column] != 0)]['residue'].values
            residue2 = int(column)
            for residue1 in set(residue1s):
                if hbond_contact_map[(hbond_contact_map.residue1 == residue1) & (hbond_contact_map.residue2 == residue2)].empty:
                    interaction_sum = hbond_network_dataframe[(hbond_network_dataframe[column] != 0) & (hbond_network_dataframe.residue == residue1)].shape[0]
                    hbond_contact_map = pd.concat([hbond_contact_map,pd.DataFrame([[residue1,residue2,interaction_sum / n_frames]],columns=['residue1','residue2','interaction'])],ignore_index=True)
                else:
                    interaction_sum = hbond_network_dataframe[(hbond_network_dataframe[column] != 0) & (hbond_network_dataframe.residue == residue1)].shape[0]
                    hbond_contact_map.loc[(hbond_contact_map.residue1 == residue1) & (hbond_contact_map.residue2 == residue2),'interaction'] += interaction_sum / n_frames
    #print(hbond_contact_map)
    hbond_contact_map.to_csv(location + traj_note + '/hbond_contact_map.csv')
    print('finishing! ' + traj_note)

In [5]:
from joblib import Parallel, delayed
import multiprocessing
num_cores = multiprocessing.cpu_count()
Parallel(n_jobs=num_cores)(delayed(create_hbond_contact_map)(traj_note) for traj_note in traj_notes)

finishing! 5NJY_I233T_pH70_md2
finishing! 5NJY_F238L_pH70_md2
finishing! 5NJY_F238L_pH70_md3
finishing! 5NJY_pH46_md1
finishing! 5NJY_F238L_pH70_md1
finishing! 5NJY_I233T_pH70_md4
finishing! 5NJY_pH70_md2
finishing! 5NJY_I233T_pH70_md3
finishing! 5NJY_F238LI233T_pH70_md2
finishing! 5NJY_pH70_md3
finishing! 5NJY_F238LI233T_pH70_md1
finishing! 5NJY_F238LI233T_pH70_md3
finishing! 5NJY_F238L_pH46_md1
finishing! 5NJY_pH46_md2
finishing! 5NJY_pH46_md3
finishing! 5NJY_pH70_md1
finishing! 4HFI_F238L_pH46_md2
finishing! 4HFI_pH46_md3
finishing! 4HFI_F238L_pH46_md1
finishing! 4HFI_I233T_pH46_md2
finishing! 4HFI_F238L_pH46_md3
finishing! 4HFI_pH46_md2
finishing! 4HFI_I233T_pH46_md1
finishing! 4HFI_pH46_md1
finishing! 5NJY_F238LI233T_pH46_md3
finishing! 5NJY_F238LI233T_pH46_md1
finishing! 5NJY_I233T_pH46_md1
finishing! 5NJY_F238L_pH46_md2
finishing! 5NJY_I233T_pH46_md2
finishing! 5NJY_I233T_pH46_md3
finishing! 5NJY_F238LI233T_pH46_md2
finishing! 5NJY_F238L_pH46_md3
finishing! 4NPQ_pH70_md5
finishi

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]