-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding Stereoisomer Enumeration #21
Merged
Merged
Changes from 1 commit
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
d797435
Adding Stereoisomer Enumeration and Modifying ChemaXon protonation to…
Feriolet 1fe8b43
Updating pull request and revert protonation method back to ChemAxon
Feriolet 99319e3
Updating run_dock.py to have maxstereoisomer in init_db function
Feriolet ff2a105
Removing complex_id argument
Feriolet b450d77
Update init_db
Feriolet 7b87a18
Fixing error of init_db function
Feriolet 115531f
Fixing error of max_isomer argument parse
Feriolet ca5c668
Save only one stereoisomer for one mol_name (id) based on the best do…
Feriolet 31ac14e
fix stereoisomer name and parameter
Feriolet fc41f5a
fix stereoisomer name and parameter
Feriolet 7573791
Change position of max_stereoisomers in arg list
DrrDom a1cb826
Fix error in add_protonation and change treatment of stereo_id in sav…
DrrDom 7f60c8f
Fix potential issues in get_isomers function
DrrDom 17e01e4
Adapt get_sdf_from_dock_db.py to stereo_id and fix retrieval of arbit…
DrrDom File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ | |
from easydock.auxiliary import take | ||
from rdkit import Chem | ||
from rdkit.Chem import AllChem | ||
from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions | ||
|
||
|
||
def create_db(db_fname, args, args_to_save=(), config_args_to_save=('protein', 'protein_setup'), unique_smi=False): | ||
|
@@ -32,7 +33,8 @@ def create_db(db_fname, args, args_to_save=(), config_args_to_save=('protein', ' | |
cur = conn.cursor() | ||
cur.execute(f"""CREATE TABLE IF NOT EXISTS mols | ||
( | ||
id TEXT PRIMARY KEY, | ||
id TEXT, | ||
stereo_id TEXT, | ||
smi TEXT {'UNIQUE' if unique_smi else ''}, | ||
smi_protonated TEXT, | ||
source_mol_block TEXT, | ||
|
@@ -41,7 +43,8 @@ def create_db(db_fname, args, args_to_save=(), config_args_to_save=('protein', ' | |
pdb_block TEXT, | ||
mol_block TEXT, | ||
dock_time REAL, | ||
time TEXT | ||
time TEXT, | ||
PRIMARY KEY (id, stereo_id) | ||
)""") | ||
|
||
# this will create a setup table with the first item in YAML format which contains all input args, and additional | ||
|
@@ -136,6 +139,18 @@ def restore_setup_from_db(db_fname): | |
|
||
return d, tmpfiles | ||
|
||
def get_isomers(mol): | ||
opts = StereoEnumerationOptions(tryEmbedding=True,maxIsomers=32,rand=0xf00d) | ||
# this is a workaround for rdkit issue - if a double bond has STEREOANY it will cause errors at | ||
# stereoisomer enumeration, we replace STEREOANY with STEREONONE in these cases | ||
try: | ||
isomers = tuple(EnumerateStereoisomers(mol, options=opts)) | ||
except RuntimeError: | ||
for bond in mol[1].GetBonds(): | ||
if bond.GetStereo() == Chem.BondStereo.STEREOANY: | ||
bond.SetStereo(Chem.rdChem.BondStereo.STEREONONE) | ||
isomers = tuple(EnumerateStereoisomers(mol,options=opts)) | ||
return isomers | ||
|
||
def init_db(db_fname, input_fname, prefix=None): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
|
@@ -144,15 +159,17 @@ def init_db(db_fname, input_fname, prefix=None): | |
data_smi = [] # non 3D structures | ||
data_mol = [] # 3D structures | ||
for mol, mol_name in read_input.read_input(input_fname): | ||
smi = Chem.MolToSmiles(mol, isomericSmiles=True) | ||
if prefix: | ||
mol_name = f'{prefix}-{mol_name}' | ||
if mol_is_3d(mol): | ||
data_mol.append((mol_name, smi, Chem.MolToMolBlock(mol))) | ||
else: | ||
data_smi.append((mol_name, smi)) | ||
cur.executemany(f'INSERT INTO mols (id, smi) VALUES(?, ?)', data_smi) | ||
cur.executemany(f'INSERT INTO mols (id, smi, source_mol_block) VALUES(?, ?, ?)', data_mol) | ||
isomers = get_isomers(mol) | ||
for stereo_index, stereo_mol in enumerate(isomers): | ||
smi = Chem.MolToSmiles(stereo_mol, isomericSmiles=True) | ||
if prefix: | ||
mol_name = f'{prefix}-{mol_name}' | ||
if mol_is_3d(mol): | ||
data_mol.append((mol_name, stereo_index, smi, Chem.MolToMolBlock(stereo_mol))) | ||
else: | ||
data_smi.append((mol_name, stereo_index, smi)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if prefix:
mol_name = f'{prefix}-{mol_name}'
if mol_is_3d(mol):
data_mol.append((mol_name, 0, smi, Chem.MolToMolBlock(stereo_mol)))
else:
isomers = get_isomers(mol, max_isomers=max_isomers)
for stereo_index, stereo_mol in enumerate(isomers):
smi = Chem.MolToSmiles(stereo_mol, isomericSmiles=True)
data_smi.append((mol_name, stereo_index, smi))
|
||
cur.executemany(f'INSERT INTO mols (id, stereo_id, smi) VALUES(?, ?, ?)', data_smi) | ||
cur.executemany(f'INSERT INTO mols (id, stereo_id, smi, source_mol_block) VALUES(?, ?, ?, ?)', data_mol) | ||
conn.commit() | ||
|
||
|
||
|
@@ -166,7 +183,7 @@ def get_protonation_arg_value(db_conn): | |
return not d['no_protonation'] | ||
|
||
|
||
def update_db(db_conn, mol_id, data, table_name='mols', commit=True): | ||
def update_db(db_conn, mol_id, data, table_name='mols', commit=True,complex_id=True): | ||
""" | ||
|
||
:param db_conn: | ||
|
@@ -176,13 +193,17 @@ def update_db(db_conn, mol_id, data, table_name='mols', commit=True): | |
:return: | ||
""" | ||
if data: | ||
if complex_id and '_' in mol_id: | ||
mol_id, stereo_index = mol_id.rsplit('_',1) | ||
else: | ||
stereo_index = '' | ||
cols, values = zip(*data.items()) | ||
db_conn.execute(f"""UPDATE {table_name} | ||
SET {', '.join(['%s = ?'] * len(cols))}, | ||
time = CURRENT_TIMESTAMP | ||
WHERE | ||
id = ? | ||
""" % cols, list(values) + [mol_id]) | ||
id = ? AND stereo_id = ? | ||
""" % cols, list(values) + [mol_id,stereo_index]) | ||
if commit: | ||
db_conn.commit() | ||
|
||
|
@@ -242,24 +263,27 @@ def select_mols_to_dock(db_conn, table_name='mols', add_sql=None): | |
smi_field_name = 'smi_protonated' if protonation_status else 'smi' | ||
mol_field_name = 'source_mol_block_protonated' if protonation_status else 'source_mol_block' | ||
|
||
sql = f"""SELECT id, {smi_field_name}, {mol_field_name} | ||
sql = f"""SELECT id, stereo_id, {smi_field_name}, {mol_field_name} | ||
FROM {table_name} | ||
WHERE docking_score IS NULL AND | ||
(({smi_field_name} IS NOT NULL AND {smi_field_name != ''}) OR | ||
({mol_field_name} IS NOT NULL AND {mol_field_name != ''})) """ | ||
if isinstance(add_sql, str) and add_sql: | ||
sql += add_sql | ||
for mol_id, smi, mol_block in cur.execute(sql): | ||
for mol_id, stereo_index, smi, mol_block in cur.execute(sql): | ||
if mol_block is None: | ||
mol = Chem.MolFromSmiles(smi) | ||
else: | ||
mol = Chem.MolFromMolBlock(mol_block, removeHs=False) | ||
if mol: | ||
mol.SetProp('_Name', mol_id) | ||
if stereo_index == '' or stereo_index == None: | ||
mol.SetProp('_Name', mol_id) | ||
else: | ||
mol.SetProp('_Name', mol_id + '_' + stereo_index) | ||
yield mol | ||
|
||
|
||
def add_protonation(db_fname, tautomerize=True, table_name='mols', add_sql=''): | ||
def add_protonation(db_fname, tautomerize=True, table_name='mols', add_sql='',complex_id=True): | ||
''' | ||
Protonate SMILES by Chemaxon cxcalc utility to get molecule ionization states at pH 7.4 | ||
:param db_fname: | ||
|
@@ -274,7 +298,7 @@ def add_protonation(db_fname, tautomerize=True, table_name='mols', add_sql=''): | |
try: | ||
cur = conn.cursor() | ||
|
||
sql = f"""SELECT smi, source_mol_block, id | ||
sql = f"""SELECT smi, source_mol_block, id, stereo_id | ||
FROM {table_name} | ||
WHERE docking_score is NULL AND smi_protonated is NULL """ | ||
sql += add_sql | ||
|
@@ -286,14 +310,14 @@ def add_protonation(db_fname, tautomerize=True, table_name='mols', add_sql=''): | |
|
||
smi_ids = [] | ||
mol_ids = [] | ||
for i, (smi, mol_block, mol_name) in enumerate(data_list): | ||
for i, (smi, mol_block, mol_name, stereo_index) in enumerate(data_list): | ||
if mol_block is None: | ||
smi_ids.append(mol_name) | ||
# add missing mol blocks | ||
m = Chem.MolFromSmiles(data_list[i][0]) | ||
m.SetProp('_Name', mol_name) | ||
m_block = Chem.MolToMolBlock(m) | ||
data_list[i] = (smi, m_block, mol_name) | ||
data_list[i] = (smi, m_block, mol_name, stereo_index) | ||
else: | ||
mol_ids.append(mol_name) | ||
smi_ids = set(smi_ids) | ||
|
@@ -302,19 +326,36 @@ def add_protonation(db_fname, tautomerize=True, table_name='mols', add_sql=''): | |
output_data_smi = [] | ||
output_data_mol = [] | ||
with tempfile.NamedTemporaryFile(suffix='.smi', mode='w', encoding='utf-8') as tmp: | ||
fd1, inputs = tempfile.mkstemp() | ||
fd, output = tempfile.mkstemp() # use output file to avoid overflow of stdout in extreme cases | ||
try: | ||
for smi, _, mol_id in data_list: | ||
tmp.write(f'{smi}\t{mol_id}\n') | ||
# for smi, _, mol_id in data_list: | ||
# tmp.write(f'{smi}\t{mol_id}\n') | ||
# tmp.flush() | ||
# cmd_run = ['cxcalc', '-S', '--ignore-error', 'majormicrospecies', '-H', '7.4', '-K', | ||
# f'{"-M" if tautomerize else ""}', tmp.name] | ||
# with open(output, 'w') as file: | ||
# subprocess.run(cmd_run, stdout=file, text=True) | ||
with open(inputs,'w') as input_file: | ||
for smi, _, mol_id, stereo_index in data_list: | ||
input_file.write(f'{smi}\t{mol_id}\t{stereo_index}\n') | ||
tmp.flush() | ||
cmd_run = ['cxcalc', '-S', '--ignore-error', 'majormicrospecies', '-H', '7.4', '-K', | ||
f'{"-M" if tautomerize else ""}', tmp.name] | ||
with open(output, 'w') as file: | ||
subprocess.run(cmd_run, stdout=file, text=True) | ||
for mol in Chem.SDMolSupplier(output, sanitize=False): | ||
cmd_run = ['python','/home/user/miniforge3/envs/easydock/lib/python3.9/site-packages/easydock/dimorphite_dl.py', \ | ||
'--smiles_file',inputs,'--output_file',output,'--min_ph','7.3','--max_ph','7.5','--max_variants','1','--silent'] | ||
subprocess.run(cmd_run,text=True) | ||
|
||
# for mol in Chem.SDMolSupplier(output, sanitize=False): | ||
for index,line in enumerate([x.strip() for x in open(output,'r').readlines()]): | ||
smi = line.split()[0] | ||
mol = Chem.MolFromSmiles(line,sanitize=False) | ||
if mol: | ||
mol_name = mol.GetProp('_Name') | ||
smi = mol.GetPropsAsDict().get('MAJORMS', None) | ||
if complex_id and len(mol.GetProp('_Name').split()) >1: | ||
mol_name, stereo_index = mol.GetProp('_Name').rsplit(maxsplit=1) | ||
else: | ||
mol_name = mol.GetProp('_Name') | ||
stereo_index = '' | ||
|
||
# smi = mol.GetPropsAsDict().get('MAJORMS', None) | ||
if smi is not None: | ||
try: | ||
cansmi = Chem.CanonSmiles(smi) | ||
|
@@ -323,7 +364,7 @@ def add_protonation(db_fname, tautomerize=True, table_name='mols', add_sql=''): | |
f'could not be read by RDKit. The molecule was skipped.\n') | ||
continue | ||
if mol_name in smi_ids: | ||
output_data_smi.append((cansmi, mol_name)) | ||
output_data_smi.append((cansmi, mol_name, stereo_index)) | ||
elif mol_name in mol_ids: | ||
try: | ||
# mol block in chemaxon sdf is an input molecule but with 2d structure | ||
|
@@ -338,25 +379,29 @@ def add_protonation(db_fname, tautomerize=True, table_name='mols', add_sql=''): | |
ref_mol = Chem.RemoveHs(Chem.MolFromSmiles(smi)) | ||
mol = AllChem.AssignBondOrdersFromTemplate(ref_mol, mol3d) | ||
Chem.AssignStereochemistryFrom3D(mol) # not sure whether it is necessary | ||
output_data_mol.append((cansmi, Chem.MolToMolBlock(mol), mol_name)) | ||
output_data_mol.append((cansmi, Chem.MolToMolBlock(mol), mol_name,stereo_index)) | ||
except ValueError: | ||
continue | ||
finally: | ||
os.remove(output) | ||
os.close(fd) | ||
os.remove(inputs) | ||
os.close(fd1) | ||
|
||
cur.executemany(f"""UPDATE {table_name} | ||
SET | ||
smi_protonated = ? | ||
WHERE | ||
id = ? | ||
id = ? AND | ||
stereo_id = ? | ||
""", output_data_smi) | ||
cur.executemany(f"""UPDATE {table_name} | ||
SET | ||
smi_protonated = ?, | ||
source_mol_block_protonated = ? | ||
WHERE | ||
id = ? | ||
id = ? AND | ||
stereo_id = ? | ||
""", output_data_mol) | ||
conn.commit() | ||
|
||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
max_isomers
should be added to arguments and elevated to the level of script arguments (argparse arguments) with the default value 1.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add please spaces after the commas