In [1]:
# needed modules
import json
from biopandas.pdb import PandasPdb
from rdkit import Chem
import pandas as pd

In [81]:
# needed input
path_to_pdb_file = '../../data/pdb_files_edited/O43570/4HT2_V50.pdb'

path_to_reference = '../../data/optimized_ligands/O43570/4HT2_V50.sdf'

In [82]:
# get structure
ppdb = PandasPdb().read_pdb(path_to_pdb_file)

atom_df = ppdb.df['ATOM']
hetatm_df = ppdb.df['HETATM']

reference = Chem.SDMolSupplier(path_to_reference)[0]
reference = reference.GetConformer()

In [83]:
# calculate distances between reference ligand atoms to all other atoms in the structure
i = 0
series_atoms = {}
series_hetatoms = {}
for position in reference.GetPositions():
    series_atoms[i] = ppdb.distance(xyz=position, records=('ATOM'))
    series_hetatoms[i] = ppdb.distance(xyz=position, records=('HETATM'))
    i += 1
# concatenate into a dataframe
series_atoms = pd.concat(series_atoms, axis=1)
series_hetatoms = pd.concat(series_hetatoms, axis=1)
# assign to each atom the minimum distance to the reference ligand
series_atoms = series_atoms.min(axis=1)
series_hetatoms = series_hetatoms.min(axis=1)
# add column with distances to the 'ATOM' biopandas dataframe
atom_df = pd.concat([series_atoms, atom_df], axis=1)
# add column with distances to the 'HETATM' biopandas dataframe
hetatm_df = pd.concat([series_hetatoms, hetatm_df], axis=1)

In [84]:
# get residue number and chain_id of aa residues that are within 15 A
residue_numbers_atom = atom_df.loc[atom_df[0]<15][[0, 'residue_number', 'chain_id', 'residue_name']].groupby(['residue_number', 'chain_id', 'residue_name']).min()

# coordinates of C_alpha of aa that are within 15 A
coordinates_C_alphas = []

for ids, dist in residue_numbers_atom.itertuples():
    residue_number, chain_id, residue_name = ids[0], ids[1], ids[2]
    coordinates_C_alpha = atom_df.loc[(atom_df['atom_name']=='C') &
                                        (atom_df['residue_number']==residue_number) &
                                        (atom_df['chain_id']==chain_id) &
                                        (atom_df['residue_name']==residue_name)][
                                                                        ['x_coord', 'y_coord', 'z_coord']].iloc[:1].values.tolist()
    if len(coordinates_C_alpha) == 1:
        coordinates_C_alphas += coordinates_C_alpha
        coordinates_C_alphas[-1] = [dist] + coordinates_C_alphas[-1]
    else:
        print(len(coordinates_C_alpha))

coordinates_C_alphas = pd.DataFrame(coordinates_C_alphas, columns=[0, 'x_coord', 'y_coord', 'z_coord'])
coordinates_C_alphas


Unnamed: 0,0,x_coord,y_coord,z_coord
0,9.453561,-11.072,0.057,18.205
1,5.990797,-9.793,-1.295,15.771
2,11.530119,-9.630,-1.828,12.208
3,6.249789,-10.399,-1.714,8.769
4,14.797858,-13.181,0.520,9.124
...,...,...,...,...
152,12.486953,-9.263,0.200,0.142
153,14.534275,11.343,1.928,2.227
154,13.861252,16.513,-0.723,2.876
155,14.111762,12.494,6.106,31.160


In [90]:
# coordinates of the first atom entries of other heterogens included in the binding site definition
coordinates_first_hetatm_entry = hetatm_df.loc[(~hetatm_df['residue_name'].isin(['HOH'])) & (hetatm_df[0]<10.5)][
                                            [0, 'residue_number', 'x_coord', 'y_coord', 'z_coord']]
# put indexes in a column
coordinates_first_hetatm_entry = coordinates_first_hetatm_entry.reset_index()
# get indexes of first atom entry
indexes = coordinates_first_hetatm_entry[['residue_number', 'index']].groupby(['residue_number']).min()['index'].to_list()
# coordinates of first atom entry
coordinates_first_hetatm_entry = coordinates_first_hetatm_entry.loc[coordinates_first_hetatm_entry['index'].isin(indexes)][
                                            [0,'x_coord', 'y_coord', 'z_coord']]
coordinates_first_hetatm_entry

Unnamed: 0,0,x_coord,y_coord,z_coord
0,2.060601,-0.003,-11.009,12.312


In [23]:
# create the docking definition .json file

'''
{'MaximumOverlapVolumeForDocking': '2.9',
 'bindingSite': {'definitionSpheres': [
     {'r': 0.1, 'x': 60.075, 'y': 44.607, 'z': 20.399}]},
 'dataType': 1,
 'extendedBindingSite': {'definitionSpheres': [
   {'r': 0.1, 'x': 64.574, 'y': 38.854, 'z': 21.33}]},
 'version': 3}
'''

docking_definition = {"MaximumOverlapVolumeForDocking": "2.9",
                      "bindingSite": {"definitionSpheres": []},
                      "dataType": 1,
                      "extendedBindingSite": {"definitionSpheres": []},
                      "version": 3}

# add bindingSite definitionSpheres:
for x, y, z in coordinates_C_alphas.loc[coordinates_C_alphas[0]<=6.5][['x_coord', 'y_coord', 'z_coord']].values:
    docking_definition['bindingSite']['definitionSpheres'] += [{'r': 0.1, 'x': x, 'y': y, 'z': z}]

for x, y, z in coordinates_first_hetatm_entry.loc[coordinates_first_hetatm_entry[0]<=6.5][['x_coord', 'y_coord', 'z_coord']].values:
    docking_definition['bindingSite']['definitionSpheres'] += [{'r': 0.1, 'x': x, 'y': y, 'z': z}]

# add extendedBindingSite definitionSpheres:
for x, y, z in coordinates_C_alphas.loc[coordinates_C_alphas[0]<=10.5][['x_coord', 'y_coord', 'z_coord']].values:
    docking_definition['extendedBindingSite']['definitionSpheres'] += [{'r': 0.1, 'x': x, 'y': y, 'z': z}]

for x, y, z in coordinates_first_hetatm_entry.loc[coordinates_first_hetatm_entry[0]<=10.5][['x_coord', 'y_coord', 'z_coord']].values:
    docking_definition['extendedBindingSite']['definitionSpheres'] += [{'r': 0.1, 'x': x, 'y': y, 'z': z}]

docking_definition

{'MaximumOverlapVolumeForDocking': '2.9',
 'bindingSite': {'definitionSpheres': [{'r': 0.1,
    'x': -9.793,
    'y': -1.295,
    'z': 15.771},
   {'r': 0.1, 'x': -10.399, 'y': -1.714, 'z': 8.769},
   {'r': 0.1, 'x': -10.135, 'y': -10.644, 'z': 19.447},
   {'r': 0.1, 'x': -4.972, 'y': -18.444, 'z': 18.196},
   {'r': 0.1, 'x': 2.506, 'y': -17.818, 'z': 21.492},
   {'r': 0.1, 'x': 1.228, 'y': -17.736, 'z': 17.992},
   {'r': 0.1, 'x': -2.539, 'y': -16.046, 'z': 12.478},
   {'r': 0.1, 'x': -5.195, 'y': -12.903, 'z': 6.826},
   {'r': 0.1, 'x': 1.285, 'y': -8.179, 'z': 3.87},
   {'r': 0.1, 'x': 2.501, 'y': -16.35, 'z': 12.828},
   {'r': 0.1, 'x': 5.912, 'y': -15.882, 'z': 18.019},
   {'r': 0.1, 'x': 8.578, 'y': -14.533, 'z': 20.023},
   {'r': 0.1, 'x': 11.158, 'y': -14.038, 'z': 22.425},
   {'r': 0.1, 'x': 5.397, 'y': -10.843, 'z': 25.698},
   {'r': 0.1, 'x': 8.156, 'y': -7.685, 'z': 27.52},
   {'r': 0.1, 'x': 8.755, 'y': -7.567, 'z': 24.396},
   {'r': 0.1, 'x': 7.207, 'y': -5.373, 'z': 22.9

In [12]:
# create the scoring definition .json file

'''
{'bindingSite': {'definitionSpheres': [
   {'r': 0.1, 'x': 53.102, 'y': 55.541, 'z': 14.728},
   ]},
 'dataType': 3,
 'extendedBindingSite': {'definitionSpheres': [
   {'r': 0.1, 'x': 47.396, 'y': 44.034, 'z': 24.396}]},
 'version': 3}
'''

scoring_definition = {"bindingSite": {"definitionSpheres": []},
                      "dataType": 3,
                      "extendedBindingSite": {"definitionSpheres": []},
                      "version": 3}

# add bindingSite definitionSpheres:
for x, y, z in coordinates_C_alphas.loc[coordinates_C_alphas[0]<=6.5][['x_coord', 'y_coord', 'z_coord']].values:
    scoring_definition['bindingSite']['definitionSpheres'] += [{'r': 0.1, 'x': x, 'y': y, 'z': z}]

for x, y, z in coordinates_first_hetatm_entry.loc[coordinates_first_hetatm_entry[0]<=6.5][['x_coord', 'y_coord', 'z_coord']].values:
    scoring_definition['bindingSite']['definitionSpheres'] += [{'r': 0.1, 'x': x, 'y': y, 'z': z}]

# add extendedBindingSite definitionSpheres:
for x, y, z in coordinates_C_alphas.loc[coordinates_C_alphas[0]<=10.5][['x_coord', 'y_coord', 'z_coord']].values:
    scoring_definition['extendedBindingSite']['definitionSpheres'] += [{'r': 0.1, 'x': x, 'y': y, 'z': z}]

for x, y, z in coordinates_first_hetatm_entry.loc[coordinates_first_hetatm_entry[0]<=10.5][['x_coord', 'y_coord', 'z_coord']].values:
    scoring_definition['extendedBindingSite']['definitionSpheres'] += [{'r': 0.1, 'x': x, 'y': y, 'z': z}]

scoring_definition

{'bindingSite': {'definitionSpheres': [{'r': 0.1,
    'x': 64.883,
    'y': 13.968,
    'z': 21.448},
   {'r': 0.1, 'x': 62.405, 'y': 12.936, 'z': 25.005},
   {'r': 0.1, 'x': 64.768, 'y': 11.446, 'z': 26.398},
   {'r': 0.1, 'x': 66.178, 'y': 10.137, 'z': 28.93},
   {'r': 0.1, 'x': 66.697, 'y': 5.386, 'z': 29.556},
   {'r': 0.1, 'x': 70.445, 'y': 1.08, 'z': 26.405},
   {'r': 0.1, 'x': 55.341, 'y': -1.798, 'z': 15.128},
   {'r': 0.1, 'x': 53.802, 'y': 0.824, 'z': 14.838},
   {'r': 0.1, 'x': 53.297, 'y': 3.622, 'z': 14.696},
   {'r': 0.1, 'x': 54.658, 'y': 4.817, 'z': 17.374},
   {'r': 0.1, 'x': 56.848, 'y': 3.525, 'z': 19.186},
   {'r': 0.1, 'x': 60.154, 'y': 3.723, 'z': 17.953},
   {'r': 0.1, 'x': 62.103, 'y': 3.163, 'z': 15.687},
   {'r': 0.1, 'x': 63.675, 'y': 5.745, 'z': 15.657},
   {'r': 0.1, 'x': 64.692, 'y': 5.524, 'z': 18.498},
   {'r': 0.1, 'x': 66.026, 'y': 2.858, 'z': 18.395},
   {'r': 0.1, 'x': 70.406, 'y': 4.308, 'z': 19.914},
   {'r': 0.1, 'x': 62.5, 'y': -0.486, 'z': 10.12

In [24]:
with open('../data/docking_definition_myscript.json', 'w') as outfile:
    json.dump(docking_definition, outfile, indent=4)

In [18]:
help(ppdb.to_pdb)

Help on method to_pdb in module biopandas.pdb.pandas_pdb:

to_pdb(path, records=None, gz=False, append_newline=True) method of biopandas.pdb.pandas_pdb.PandasPdb instance
    Write record DataFrames to a PDB file or gzipped PDB file.
    
    Parameters
    ----------
    path : str
        A valid output path for the pdb file
    
    records : iterable, default: None
        A list of PDB record sections in
        {'ATOM', 'HETATM', 'ANISOU', 'OTHERS'} that are to be written.
        Writes all lines to PDB if `records=None`.
    
    gz : bool, default: False
        Writes a gzipped PDB file if True.
    
    append_newline : bool, default: True
        Appends a new line at the end of the PDB file if True

