Skip to content

Commit

Permalink
Merge pull request #53 from choderalab/fragment
Browse files Browse the repository at this point in the history
Use OESubSearch to map atoms
  • Loading branch information
ChayaSt committed Mar 23, 2018
2 parents 687fc05 + 5800f34 commit 2c81aca
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 53 deletions.
171 changes: 119 additions & 52 deletions torsionfit/qmscan/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from openeye import oechem, oeiupac, oedepict
import os
import numpy as np
from torsionfit.utils import logger
import time

from openeye import oechem, oeiupac, oedepict
from torsionfit.utils import logger
from openmoltools import openeye


def write_oedatabase(moldb, ofs, mlist, size):
"""
This function writes out a new oedatabase from an existing database
Expand Down Expand Up @@ -84,28 +86,40 @@ def new_output_stream(outname):
return ofs


def to_oemol(filename, title=None):
def to_oemol(filename, title=True, verbose=True):
"""Create OEMol from file. If more than one mol in file, return list of OEMols.
Parameters
----------
filename: str
absolute path to
title: str
title: str, title
title for molecule. If None, IUPAC name will be given as title.
Returns
-------
mollist: list
list of OEMol for multiple molecules. OEMol if file only has one molecule.
"""

if not os.path.exists(filename):
raise Exception("File {} not found".format(filename))
if verbose:
logger().info("Loading molecules from {}".format(filename))

ifs = oechem.oemolistream(filename)
moldb = oechem.OEMolDatabase(ifs)
#moldb = oechem.OEMolDatabase(ifs)
mollist = []

for mol in moldb.GetOEMols():
molecule = oechem.OEMol(mol)
mollist.append(normalize_molecule(molecule, title))
molecule = oechem.OECreateOEGraphMol()
while oechem.OEReadMolecule(ifs, molecule):
molecule_copy = oechem.OEMol(molecule)
if title:
title = molecule_copy.GetTitle()
if verbose:
logger().infor("Reading molecule {}".format(title))

mollist.append(normalize_molecule(molecule_copy, title))

if len(mollist) <= 1:
mollist = mollist[0]
Expand Down Expand Up @@ -149,7 +163,6 @@ def normalize_molecule(molecule, title=None):
# Check for any missing atom names, if found reassign all of them.
if any([atom.GetName() == '' for atom in molcopy.GetAtoms()]):
oechem.OETriposAtomNames(molcopy)

return molcopy


Expand Down Expand Up @@ -294,83 +307,137 @@ def mol_to_tagged_smiles(infile, outfile):

def get_atom_map(tagged_smiles, molecule=None):
"""
Maps tag index in SMILES to atom index in OEMol.
Returns a dictionary that maps tag on SMILES to atom index in molecule.
Parameters
----------
tagged_smiles: str
index-tagged explicit Hydrogen SMILES string
molecule: OEMOl
The OEMol to get the map for. Default is None.
When None, a new OEMol will be generated from SMILES. If an OEMol is provided, the map will be to that OEMol.
index-tagged explicit hydrogen SMILES string
molecule: OEMol
molecule to generate map for. If None, a new OEMol will be generated from the tagged SMILES, the map will map to
this molecule and it will be returned.
Returns
-------
map: dict
maps tag to index in returned OEMol. {map_idx: atom_idx}
molecule: OEMol. Only if no OEMol was provided.
atom_map: dict
a dictionary that maps tag to atom index {tag:idx}
molecule: OEMol
If a molecule was not provided, the generated molecule will be returned.
"""
target = molecule
if not molecule:
target = oechem.OEMol()
oechem.OESmilesToMol(target, tagged_smiles)

# create maximum common substructure object
mcss = oechem.OEMCSSearch(tagged_smiles, oechem.OEMCSType_Approximate)
mcss.SetMaxMatches(1)
mcss.SetMinAtoms(target.GetMaxAtomIdx())
# Scoring function
mcss.SetMCSFunc(oechem.OEMCSMaxAtoms())
if molecule is None:
molecule = openeye.smiles_to_oemol(tagged_smiles)

ss = oechem.OESubSearch(tagged_smiles)
oechem.OEPrepareSearch(molecule, ss)
ss.SetMaxMatches(1)

atom_map = {}
unique = True
t1 = time.time()
for count, match in enumerate(mcss.Match(target, unique)):
matches = [m for m in ss.Match(molecule)]
t2 = time.time()
seconds = t2-t1
logger().info("Substructure search took {} seconds".format(seconds))
if not matches:
logger().info("MCSS failed for {}, smiles: {}".format(molecule.GetTitle(), tagged_smiles))
return False
for match in matches:
for ma in match.GetAtoms():
atom_map[ma.pattern.GetMapIdx()] = ma.target.GetIdx()
t2 = time.time()
logger().info('MCSS took {} seconds'.format(t2-t1))

m = oechem.OEGraphMol()
oechem.OESubsetMol(m, match, True)
logger().info("Match SMILES: {}".format(oechem.OEMolToSmiles(m)))
# sanity check
mol = oechem.OEGraphMol()
oechem.OESubsetMol(mol, match, True)
logger().info("Match SMILES: {}".format(oechem.OEMolToSmiles(mol)))
if molecule is None:
return molecule, atom_map

if molecule:
return atom_map
else:
return atom_map, target
return atom_map


def to_mapped_xyz(molecule, atom_map, conformer=None):
def to_mapped_xyz(molecule, atom_map, conformer=None, xyz_format=False, filename=None):
"""
Generate xyz coordinates for molecule in the order given by the atom_map. atom_map is a dictionary that maps the
tag on the SMILES to the atom idex in OEMol.
Parameters
----------
molecule
atom_map
conformer
molecule: OEMol with conformers
atom_map: dict
maps tag in SMILES to atom index
conformer: int
Which conformer to write xyz file for. If None, write out all conformers. Default is None
xyz_format: bool
If True, will write out number of atoms and molecule name. If false, will only write out elements and coordinates
filename: str
Name of file to save to. If None, only returns a string.
Returns
-------
str: XYZ file format
str: elements and xyz coordinates in order of tagged SMILES
"""

xyz = ""
for k, mol in enumerate(molecule.GetConfs()):
if k == conformer or conformer is None:
xyz += "\n{}".format(mol.GetMaxAtomIdx())
xyz += "\n{}".format(mol.GetTitle())
if xyz_format:
xyz += "{}\n".format(mol.GetMaxAtomIdx())
xyz += "{}\n".format(mol.GetTitle())
coords = oechem.OEFloatArray(mol.GetMaxAtomIdx() * 3)
mol.GetCoords(coords)
for mapping in range(1, len(atom_map)):
if k != 0 and not xyz_format:
xyz += "*"
for mapping in range(1, len(atom_map)+1):
idx = atom_map[mapping]
atom = mol.GetAtom(oechem.OEHasAtomIdx(idx))
syb = oechem.OEGetAtomicSymbol(atom.GetAtomicNum())

xyz += "\n {} {:05.3f} {:05.3f} {:05.3f}".format(syb,
xyz += " {} {:05.3f} {:05.3f} {:05.3f}\n".format(syb,
coords[idx * 3],
coords[idx * 3 + 1],
coords[idx * 3 + 2])

if filename:
file = open("{}.xyz".format(filename, 'w'))
file.write(xyz)
file.close()
return xyz


# def run_psi4_json(tagged_smiles, molecule, driver, method, basis, properties=None, return_output=True, xyz_file=True):
# """
#
# Parameters
# ----------
# tagged_smiles
# xyz
# driver
# method
# basis
# properties
# return_output
# xyz_file
#
# Returns
# -------
#
# """
# json_data = {}
# json_data["tagged_smiles"] = tagged_smiles
# json_data["molecule"] = molecule
# json_data["driver"] = "property"
# json_data["kwargs"] = {"properties": properties}
# json_data["method"] = method
# json_data["options"] = {"BASIS": basis}
# json_data["return_output"] = return_output
#
# name = molecule.split("\n")[2]
# if xyz_file:
# file = open("{}.xyz".format(name), 'w')
# file.write(molecule)
# file.close()
#
# j = json.dumb(json_data, indent=4, sort_keys=True)
# f = open("{}.input.json".format(name), 'w')
# f.close()
#
# psi4.json_wrapper.run_json(json_data)
# j = json.dump(json_data, indent=4, sort_keys=True)
#
# f = open("{}.output.json".format(name))
16 changes: 15 additions & 1 deletion torsionfit/tests/test_qmscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,20 @@ def test_atom_map(self):
atom_2.SetAtomicNum(i+1)
self.assertEqual(oechem.OECreateCanSmiString(mol_1), oechem.OECreateCanSmiString(mol_2))

@unittest.skipUnless(has_openeye, "Cannot test without OpenEye")
def test_atom_map_order(self):
"""Test atom map"""
from openeye import oechem
tagged_smiles = '[H:5][C:1]#[N+:4][C:3]([H:9])([H:10])[C:2]([H:6])([H:7])[H:8]'
mol_from_tagged_smiles = openeye.smiles_to_oemol(tagged_smiles)
atom_map = utils.get_atom_map(tagged_smiles, mol_from_tagged_smiles)

# Compare atom map to tag
for i in range(1, len(atom_map) +1):
atom_1 = mol_from_tagged_smiles.GetAtom(oechem.OEHasAtomIdx(atom_map[i]))
self.assertEqual(i, atom_1.GetMapIdx())


@unittest.skipUnless(has_openeye, "Cannot test without OpneEye")
def test_mapped_xyz(self):
"""Test writing out mapped xyz"""
Expand All @@ -119,6 +133,6 @@ def test_mapped_xyz(self):
atom_map_mol2 = {1:0, 2:1, 3:2, 4:3, 5:4, 6:5, 7:6, 8:7, 9:8, 10:9, 11:10, 12:11, 13:12, 14:13, 15:14}
xyz_2 = utils.to_mapped_xyz(mol_2, atom_map_mol2)

for ele1, ele2 in zip(xyz_1.split('\n')[3:], xyz_2.split('\n')[3:]):
for ele1, ele2 in zip(xyz_1.split('\n')[:-1], xyz_2.split('\n')[:-1]):
self.assertEqual(ele1.split(' ')[2], ele2.split(' ')[2])

0 comments on commit 2c81aca

Please sign in to comment.