Merge pull request #53 from choderalab/fragment

Use OESubSearch to map atoms
choderalab · Mar 23, 2018 · 2c81aca · 2c81aca
2 parents 687fc05 + 5800f34
commit 2c81aca
Show file tree

Hide file tree

Showing 2 changed files with 134 additions and 53 deletions.
diff --git a/torsionfit/qmscan/utils.py b/torsionfit/qmscan/utils.py
@@ -1,9 +1,11 @@
-from openeye import oechem, oeiupac, oedepict
 import os
-import numpy as np
-from torsionfit.utils import logger
 import time
 
+from openeye import oechem, oeiupac, oedepict
+from torsionfit.utils import logger
+from openmoltools import openeye
+
+
 def write_oedatabase(moldb, ofs, mlist, size):
     """
     This function writes out a new oedatabase from an existing database
@@ -84,28 +86,40 @@ def new_output_stream(outname):
     return ofs
 
 
-def to_oemol(filename, title=None):
+def to_oemol(filename, title=True, verbose=True):
     """Create OEMol from file. If more than one mol in file, return list of OEMols.
 
     Parameters
     ----------
     filename: str
         absolute path to
-    title: str
+    title: str, title
         title for molecule. If None, IUPAC name will be given as title.
 
     Returns
     -------
     mollist: list
         list of OEMol for multiple molecules. OEMol if file only has one molecule.
     """
+
+    if not os.path.exists(filename):
+        raise Exception("File {} not found".format(filename))
+    if verbose:
+        logger().info("Loading molecules from {}".format(filename))
+
     ifs = oechem.oemolistream(filename)
-    moldb = oechem.OEMolDatabase(ifs)
+    #moldb = oechem.OEMolDatabase(ifs)
     mollist = []
 
-    for mol in moldb.GetOEMols():
-        molecule = oechem.OEMol(mol)
-        mollist.append(normalize_molecule(molecule, title))
+    molecule = oechem.OECreateOEGraphMol()
+    while oechem.OEReadMolecule(ifs, molecule):
+        molecule_copy = oechem.OEMol(molecule)
+        if title:
+            title = molecule_copy.GetTitle()
+            if verbose:
+                logger().infor("Reading molecule {}".format(title))
+
+        mollist.append(normalize_molecule(molecule_copy, title))
 
     if len(mollist) <= 1:
         mollist = mollist[0]
@@ -149,7 +163,6 @@ def normalize_molecule(molecule, title=None):
     # Check for any missing atom names, if found reassign all of them.
     if any([atom.GetName() == '' for atom in molcopy.GetAtoms()]):
         oechem.OETriposAtomNames(molcopy)
-
     return molcopy
 
 
@@ -294,83 +307,137 @@ def mol_to_tagged_smiles(infile, outfile):
 
 def get_atom_map(tagged_smiles, molecule=None):
     """
-    Maps tag index in SMILES to atom index in OEMol.
+    Returns a dictionary that maps tag on SMILES to atom index in molecule.
     Parameters
     ----------
     tagged_smiles: str
-        index-tagged explicit Hydrogen SMILES string
-    molecule: OEMOl
-        The OEMol to get the map for. Default is None.
-        When None, a new OEMol will be generated from SMILES. If an OEMol is provided, the map will be to that OEMol.
+        index-tagged explicit hydrogen SMILES string
+    molecule: OEMol
+        molecule to generate map for. If None, a new OEMol will be generated from the tagged SMILES, the map will map to
+        this molecule and it will be returned.
 
     Returns
     -------
-    map: dict
-        maps tag to index in returned OEMol. {map_idx: atom_idx}
-    molecule: OEMol. Only if no OEMol was provided.
-
+    atom_map: dict
+        a dictionary that maps tag to atom index {tag:idx}
+    molecule: OEMol
+        If a molecule was not provided, the generated molecule will be returned.
     """
-    target = molecule
-    if not molecule:
-        target = oechem.OEMol()
-        oechem.OESmilesToMol(target, tagged_smiles)
-
-    # create maximum common substructure object
-    mcss = oechem.OEMCSSearch(tagged_smiles, oechem.OEMCSType_Approximate)
-    mcss.SetMaxMatches(1)
-    mcss.SetMinAtoms(target.GetMaxAtomIdx())
-    # Scoring function
-    mcss.SetMCSFunc(oechem.OEMCSMaxAtoms())
+    if molecule is None:
+        molecule = openeye.smiles_to_oemol(tagged_smiles)
+
+    ss = oechem.OESubSearch(tagged_smiles)
+    oechem.OEPrepareSearch(molecule, ss)
+    ss.SetMaxMatches(1)
 
     atom_map = {}
-    unique = True
     t1 = time.time()
-    for count, match in enumerate(mcss.Match(target, unique)):
+    matches = [m for m in ss.Match(molecule)]
+    t2 = time.time()
+    seconds = t2-t1
+    logger().info("Substructure search took {} seconds".format(seconds))
+    if not matches:
+        logger().info("MCSS failed for {}, smiles: {}".format(molecule.GetTitle(), tagged_smiles))
+        return False
+    for match in matches:
         for ma in match.GetAtoms():
             atom_map[ma.pattern.GetMapIdx()] = ma.target.GetIdx()
-    t2 = time.time()
-    logger().info('MCSS took {} seconds'.format(t2-t1))
 
-    m = oechem.OEGraphMol()
-    oechem.OESubsetMol(m, match, True)
-    logger().info("Match SMILES: {}".format(oechem.OEMolToSmiles(m)))
+    # sanity check
+    mol = oechem.OEGraphMol()
+    oechem.OESubsetMol(mol, match, True)
+    logger().info("Match SMILES: {}".format(oechem.OEMolToSmiles(mol)))
+    if molecule is None:
+        return molecule, atom_map
 
-    if molecule:
-        return atom_map
-    else:
-        return atom_map, target
+    return atom_map
 
 
-def to_mapped_xyz(molecule, atom_map, conformer=None):
+def to_mapped_xyz(molecule, atom_map, conformer=None, xyz_format=False, filename=None):
     """
     Generate xyz coordinates for molecule in the order given by the atom_map. atom_map is a dictionary that maps the
     tag on the SMILES to the atom idex in OEMol.
     Parameters
     ----------
-    molecule
-    atom_map
-    conformer
+    molecule: OEMol with conformers
+    atom_map: dict
+        maps tag in SMILES to atom index
+    conformer: int
+        Which conformer to write xyz file for. If None, write out all conformers. Default is None
+    xyz_format: bool
+        If True, will write out number of atoms and molecule name. If false, will only write out elements and coordinates
+    filename: str
+        Name of file to save to. If None, only returns a string.
 
     Returns
     -------
-    str: XYZ file format
+    str: elements and xyz coordinates in order of tagged SMILES
 
     """
-
     xyz = ""
     for k, mol in enumerate(molecule.GetConfs()):
         if k == conformer or conformer is None:
-            xyz += "\n{}".format(mol.GetMaxAtomIdx())
-            xyz += "\n{}".format(mol.GetTitle())
+            if xyz_format:
+                xyz += "{}\n".format(mol.GetMaxAtomIdx())
+                xyz += "{}\n".format(mol.GetTitle())
             coords = oechem.OEFloatArray(mol.GetMaxAtomIdx() * 3)
             mol.GetCoords(coords)
-            for mapping in range(1, len(atom_map)):
+            if k != 0 and not xyz_format:
+                    xyz += "*"
+            for mapping in range(1, len(atom_map)+1):
                 idx = atom_map[mapping]
                 atom = mol.GetAtom(oechem.OEHasAtomIdx(idx))
                 syb = oechem.OEGetAtomicSymbol(atom.GetAtomicNum())
-
-                xyz += "\n  {}      {:05.3f}   {:05.3f}   {:05.3f}".format(syb,
+                xyz += "  {}      {:05.3f}   {:05.3f}   {:05.3f}\n".format(syb,
                                                                            coords[idx * 3],
                                                                            coords[idx * 3 + 1],
                                                                            coords[idx * 3 + 2])
+
+    if filename:
+        file = open("{}.xyz".format(filename, 'w'))
+        file.write(xyz)
+        file.close()
     return xyz
+
+
+# def run_psi4_json(tagged_smiles, molecule, driver, method, basis, properties=None, return_output=True, xyz_file=True):
+#     """
+#
+#     Parameters
+#     ----------
+#     tagged_smiles
+#     xyz
+#     driver
+#     method
+#     basis
+#     properties
+#     return_output
+#     xyz_file
+#
+#     Returns
+#     -------
+#
+#     """
+#     json_data = {}
+#     json_data["tagged_smiles"] = tagged_smiles
+#     json_data["molecule"] = molecule
+#     json_data["driver"] = "property"
+#     json_data["kwargs"] = {"properties": properties}
+#     json_data["method"] = method
+#     json_data["options"] = {"BASIS": basis}
+#     json_data["return_output"] = return_output
+#
+#     name = molecule.split("\n")[2]
+#     if xyz_file:
+#         file = open("{}.xyz".format(name), 'w')
+#         file.write(molecule)
+#         file.close()
+#
+#     j = json.dumb(json_data, indent=4, sort_keys=True)
+#     f = open("{}.input.json".format(name), 'w')
+#     f.close()
+#
+#     psi4.json_wrapper.run_json(json_data)
+#     j = json.dump(json_data, indent=4, sort_keys=True)
+#
+#     f = open("{}.output.json".format(name))
diff --git a/torsionfit/tests/test_qmscan.py b/torsionfit/tests/test_qmscan.py
@@ -96,6 +96,20 @@ def test_atom_map(self):
             atom_2.SetAtomicNum(i+1)
             self.assertEqual(oechem.OECreateCanSmiString(mol_1), oechem.OECreateCanSmiString(mol_2))
 
+    @unittest.skipUnless(has_openeye, "Cannot test without OpenEye")
+    def test_atom_map_order(self):
+        """Test atom map"""
+        from openeye import oechem
+        tagged_smiles = '[H:5][C:1]#[N+:4][C:3]([H:9])([H:10])[C:2]([H:6])([H:7])[H:8]'
+        mol_from_tagged_smiles = openeye.smiles_to_oemol(tagged_smiles)
+        atom_map = utils.get_atom_map(tagged_smiles, mol_from_tagged_smiles)
+
+        # Compare atom map to tag
+        for i in range(1, len(atom_map) +1):
+            atom_1 = mol_from_tagged_smiles.GetAtom(oechem.OEHasAtomIdx(atom_map[i]))
+            self.assertEqual(i, atom_1.GetMapIdx())
+
+
     @unittest.skipUnless(has_openeye, "Cannot test without OpneEye")
     def test_mapped_xyz(self):
         """Test writing out mapped xyz"""
@@ -119,6 +133,6 @@ def test_mapped_xyz(self):
         atom_map_mol2 = {1:0, 2:1, 3:2, 4:3, 5:4, 6:5, 7:6, 8:7, 9:8, 10:9, 11:10, 12:11, 13:12, 14:13, 15:14}
         xyz_2 = utils.to_mapped_xyz(mol_2, atom_map_mol2)
 
-        for ele1, ele2 in zip(xyz_1.split('\n')[3:], xyz_2.split('\n')[3:]):
+        for ele1, ele2 in zip(xyz_1.split('\n')[:-1], xyz_2.split('\n')[:-1]):
             self.assertEqual(ele1.split(' ')[2], ele2.split(' ')[2])