ci-lab-cz · DrrDom · Mar 14, 2024 · Feb 27, 2024 · Feb 28, 2024 · Feb 28, 2024
diff --git a/easydock/database.py b/easydock/database.py
@@ -12,6 +12,7 @@
 from easydock.auxiliary import take
 from rdkit import Chem
 from rdkit.Chem import AllChem
+from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions
 
 
 def create_db(db_fname, args, args_to_save=(), config_args_to_save=('protein', 'protein_setup'), unique_smi=False):
@@ -32,7 +33,8 @@ def create_db(db_fname, args, args_to_save=(), config_args_to_save=('protein', '
     cur = conn.cursor()
     cur.execute(f"""CREATE TABLE IF NOT EXISTS mols
                 (
-                 id TEXT PRIMARY KEY,
+                 id TEXT,
+                 stereo_id TEXT,
                  smi TEXT {'UNIQUE' if unique_smi else ''},
                  smi_protonated TEXT,
                  source_mol_block TEXT,
@@ -41,7 +43,8 @@ def create_db(db_fname, args, args_to_save=(), config_args_to_save=('protein', '
                  pdb_block TEXT,
                  mol_block TEXT,
                  dock_time REAL,
-                 time TEXT
+                 time TEXT,
+                 PRIMARY KEY (id, stereo_id)
                 )""")
 
     # this will create a setup table with the first item in YAML format which contains all input args, and additional
@@ -136,6 +139,18 @@ def restore_setup_from_db(db_fname):
 
     return d, tmpfiles
 
+def get_isomers(mol):
+    opts = StereoEnumerationOptions(tryEmbedding=True,maxIsomers=32,rand=0xf00d)
+    # this is a workaround for rdkit issue - if a double bond has STEREOANY it will cause errors at
+    # stereoisomer enumeration, we replace STEREOANY with STEREONONE in these cases
+    try:
+        isomers = tuple(EnumerateStereoisomers(mol, options=opts))
+    except RuntimeError:
+        for bond in mol[1].GetBonds():
+            if bond.GetStereo() == Chem.BondStereo.STEREOANY:
+                bond.SetStereo(Chem.rdChem.BondStereo.STEREONONE)
+        isomers = tuple(EnumerateStereoisomers(mol,options=opts))
+    return isomers
 
 def init_db(db_fname, input_fname, prefix=None):
 
@@ -144,15 +159,17 @@ def init_db(db_fname, input_fname, prefix=None):
     data_smi = []  # non 3D structures
     data_mol = []  # 3D structures
     for mol, mol_name in read_input.read_input(input_fname):
-        smi = Chem.MolToSmiles(mol, isomericSmiles=True)
-        if prefix:
-            mol_name = f'{prefix}-{mol_name}'
-        if mol_is_3d(mol):
-            data_mol.append((mol_name, smi, Chem.MolToMolBlock(mol)))
-        else:
-            data_smi.append((mol_name, smi))
-    cur.executemany(f'INSERT INTO mols (id, smi) VALUES(?, ?)', data_smi)
-    cur.executemany(f'INSERT INTO mols (id, smi, source_mol_block) VALUES(?, ?, ?)', data_mol)
+        isomers = get_isomers(mol)
+        for stereo_index, stereo_mol in enumerate(isomers):
+            smi = Chem.MolToSmiles(stereo_mol, isomericSmiles=True)
+            if prefix:
+                mol_name = f'{prefix}-{mol_name}'
+            if mol_is_3d(mol):
+                data_mol.append((mol_name, stereo_index, smi, Chem.MolToMolBlock(stereo_mol)))
+            else:
+                data_smi.append((mol_name, stereo_index, smi))
+    cur.executemany(f'INSERT INTO mols (id, stereo_id, smi) VALUES(?, ?, ?)', data_smi)
+    cur.executemany(f'INSERT INTO mols (id, stereo_id, smi, source_mol_block) VALUES(?, ?, ?, ?)', data_mol)
     conn.commit()
 
 
@@ -166,7 +183,7 @@ def get_protonation_arg_value(db_conn):
     return not d['no_protonation']
 
 
-def update_db(db_conn, mol_id, data, table_name='mols', commit=True):
+def update_db(db_conn, mol_id, data, table_name='mols', commit=True,complex_id=True):
     """
 
     :param db_conn:
@@ -176,13 +193,17 @@ def update_db(db_conn, mol_id, data, table_name='mols', commit=True):
     :return:
     """
     if data:
+        if complex_id and '_' in mol_id:
+            mol_id, stereo_index = mol_id.rsplit('_',1)
+        else:
+            stereo_index = ''
         cols, values = zip(*data.items())
         db_conn.execute(f"""UPDATE {table_name}
                            SET {', '.join(['%s = ?'] * len(cols))},
                                time = CURRENT_TIMESTAMP
                            WHERE
-                               id = ?
-                        """ % cols, list(values) + [mol_id])
+                               id = ? AND stereo_id = ?
+                        """ % cols, list(values) + [mol_id,stereo_index])
         if commit:
             db_conn.commit()
 
@@ -242,24 +263,27 @@ def select_mols_to_dock(db_conn, table_name='mols', add_sql=None):
     smi_field_name = 'smi_protonated' if protonation_status else 'smi'
     mol_field_name = 'source_mol_block_protonated' if protonation_status else 'source_mol_block'
 
-    sql = f"""SELECT id, {smi_field_name}, {mol_field_name}
+    sql = f"""SELECT id, stereo_id, {smi_field_name}, {mol_field_name}
               FROM {table_name}
               WHERE docking_score IS NULL AND 
                     (({smi_field_name} IS NOT NULL AND {smi_field_name != ''}) OR 
                      ({mol_field_name} IS NOT NULL AND {mol_field_name != ''})) """
     if isinstance(add_sql, str) and add_sql:
         sql += add_sql
-    for mol_id, smi, mol_block in cur.execute(sql):
+    for mol_id, stereo_index, smi, mol_block in cur.execute(sql):
         if mol_block is None:
             mol = Chem.MolFromSmiles(smi)
         else:
             mol = Chem.MolFromMolBlock(mol_block, removeHs=False)
         if mol:
-            mol.SetProp('_Name', mol_id)
+            if stereo_index == '' or stereo_index == None:
+                mol.SetProp('_Name', mol_id)
+            else:
+                mol.SetProp('_Name', mol_id + '_' + stereo_index)
             yield mol
 
 
-def add_protonation(db_fname, tautomerize=True, table_name='mols', add_sql=''):
+def add_protonation(db_fname, tautomerize=True, table_name='mols', add_sql='',complex_id=True):
     '''
     Protonate SMILES by Chemaxon cxcalc utility to get molecule ionization states at pH 7.4
     :param db_fname:
@@ -274,7 +298,7 @@ def add_protonation(db_fname, tautomerize=True, table_name='mols', add_sql=''):
     try:
         cur = conn.cursor()
 
-        sql = f"""SELECT smi, source_mol_block, id 
+        sql = f"""SELECT smi, source_mol_block, id, stereo_id 
                   FROM {table_name} 
                   WHERE docking_score is NULL AND smi_protonated is NULL """
         sql += add_sql
@@ -286,14 +310,14 @@ def add_protonation(db_fname, tautomerize=True, table_name='mols', add_sql=''):
 
         smi_ids = []
         mol_ids = []
-        for i, (smi, mol_block, mol_name) in enumerate(data_list):
+        for i, (smi, mol_block, mol_name, stereo_index) in enumerate(data_list):
             if mol_block is None:
                 smi_ids.append(mol_name)
                 # add missing mol blocks
                 m = Chem.MolFromSmiles(data_list[i][0])
                 m.SetProp('_Name', mol_name)
                 m_block = Chem.MolToMolBlock(m)
-                data_list[i] = (smi, m_block, mol_name)
+                data_list[i] = (smi, m_block, mol_name, stereo_index)
             else:
                 mol_ids.append(mol_name)
         smi_ids = set(smi_ids)
@@ -302,19 +326,36 @@ def add_protonation(db_fname, tautomerize=True, table_name='mols', add_sql=''):
         output_data_smi = []
         output_data_mol = []
         with tempfile.NamedTemporaryFile(suffix='.smi', mode='w', encoding='utf-8') as tmp:
+            fd1, inputs = tempfile.mkstemp()
             fd, output = tempfile.mkstemp()  # use output file to avoid overflow of stdout in extreme cases
             try:
-                for smi, _, mol_id in data_list:
-                    tmp.write(f'{smi}\t{mol_id}\n')
+                # for smi, _, mol_id in data_list:
+                #     tmp.write(f'{smi}\t{mol_id}\n')
+                # tmp.flush()
+                # cmd_run = ['cxcalc', '-S', '--ignore-error', 'majormicrospecies', '-H', '7.4', '-K',
+                #            f'{"-M" if tautomerize else ""}', tmp.name]
+                # with open(output, 'w') as file:
+                #     subprocess.run(cmd_run, stdout=file, text=True)
+                with open(inputs,'w') as input_file:
+                    for smi, _, mol_id, stereo_index in data_list:
+                        input_file.write(f'{smi}\t{mol_id}\t{stereo_index}\n')
                 tmp.flush()
-                cmd_run = ['cxcalc', '-S', '--ignore-error', 'majormicrospecies', '-H', '7.4', '-K',
-                           f'{"-M" if tautomerize else ""}', tmp.name]
-                with open(output, 'w') as file:
-                    subprocess.run(cmd_run, stdout=file, text=True)
-                for mol in Chem.SDMolSupplier(output, sanitize=False):
+                cmd_run = ['python','/home/user/miniforge3/envs/easydock/lib/python3.9/site-packages/easydock/dimorphite_dl.py', \
+                            '--smiles_file',inputs,'--output_file',output,'--min_ph','7.3','--max_ph','7.5','--max_variants','1','--silent']
+                subprocess.run(cmd_run,text=True)
+
+                # for mol in Chem.SDMolSupplier(output, sanitize=False):
+                for index,line in enumerate([x.strip() for x in open(output,'r').readlines()]):
+                    smi = line.split()[0]
+                    mol = Chem.MolFromSmiles(line,sanitize=False)
                     if mol:
-                        mol_name = mol.GetProp('_Name')
-                        smi = mol.GetPropsAsDict().get('MAJORMS', None)
+                        if complex_id and len(mol.GetProp('_Name').split()) >1:
+                            mol_name, stereo_index = mol.GetProp('_Name').rsplit(maxsplit=1)
+                        else:
+                            mol_name = mol.GetProp('_Name')
+                            stereo_index = ''
+
+                        # smi = mol.GetPropsAsDict().get('MAJORMS', None)
                         if smi is not None:
                             try:
                                 cansmi = Chem.CanonSmiles(smi)
@@ -323,7 +364,7 @@ def add_protonation(db_fname, tautomerize=True, table_name='mols', add_sql=''):
                                                  f'could not be read by RDKit. The molecule was skipped.\n')
                                 continue
                             if mol_name in smi_ids:
-                                output_data_smi.append((cansmi, mol_name))
+                                output_data_smi.append((cansmi, mol_name, stereo_index))
                             elif mol_name in mol_ids:
                                 try:
                                     # mol block in chemaxon sdf is an input molecule but with 2d structure
@@ -338,25 +379,29 @@ def add_protonation(db_fname, tautomerize=True, table_name='mols', add_sql=''):
                                     ref_mol = Chem.RemoveHs(Chem.MolFromSmiles(smi))
                                     mol = AllChem.AssignBondOrdersFromTemplate(ref_mol, mol3d)
                                     Chem.AssignStereochemistryFrom3D(mol)  # not sure whether it is necessary
-                                    output_data_mol.append((cansmi, Chem.MolToMolBlock(mol), mol_name))
+                                    output_data_mol.append((cansmi, Chem.MolToMolBlock(mol), mol_name,stereo_index))
                                 except ValueError:
                                     continue
             finally:
                 os.remove(output)
                 os.close(fd)
+                os.remove(inputs)
+                os.close(fd1)
 
         cur.executemany(f"""UPDATE {table_name}
                        SET 
                            smi_protonated = ?
                        WHERE
-                           id = ?
+                           id = ? AND
+                           stereo_id = ?
                     """, output_data_smi)
         cur.executemany(f"""UPDATE {table_name}
                        SET 
                            smi_protonated = ?, 
                            source_mol_block_protonated = ?
                        WHERE
-                           id = ?
+                           id = ? AND
+                           stereo_id = ?
                     """, output_data_mol)
         conn.commit()