In [1]:
from openeye import oechem, oequacpac

# Check two smiles

On a first pass, I wrote methods to check if two provided smiles are different microstates or resonance structuress of each other. 
Below, is a method for how to get a list of microstates that are not resonance structures.
- Caitlin Bannan, 2017/11/20

In [2]:
def get_labeled_mol(smiles, label='heavy'):
    """
    returns an OEMol with heavy atoms labeled with a specific indice
    """
    mol = oechem.OEMol()
    if not oechem.OESmilesToMol(mol, smiles):
        print("Couldn't parse smiles (%s) returning None" % smiles)
        return None
    
    for idx, a in enumerate(mol.GetAtomIter(oechem.OEIsHeavy())):
        a.SetData('heavy', idx)
    
    return mol

In [14]:
def find_tautomer(mol1, new_taut):
    """
    returns a tautomer of mol1 that has the same cannonical smile as smiles2(new_taut)
    """
    taut_opts = oequacpac.OETautomerOptions()
    taut_opts.SetCarbonHybridization(True)
    taut_opts.SetLevel(5)
    taut_opts.SetMaxSearchTime(60.0)
    taut_opts.SetRankTautomers(True)
    taut_opts.SetSaveStereo(True) 

    mol = oechem.OEMol(mol1)
    temp_mol = oechem.OEMol()
    oechem.OESmilesToMol(temp_mol, new_taut)
    smiles = oechem.OEMolToSmiles(temp_mol)

    if smiles == oechem.OEMolToSmiles(mol):
        print("initial mol is the same as the tautomer")
        return mol
    
    for taut in oequacpac.OEEnumerateTautomers(mol, taut_opts):
        s = oechem.OEMolToSmiles(taut)
        if s == smiles:
            return oechem.OEMol(taut)
        
    return None

In [4]:
def is_resonance(mol1, mol2, label='heavy'):
    """
    Returns True is the two molecules are resonance structures of each other
    
    This assumes that each heavy atom in mol1 and mol2 have been mapped correct so 
    identical heavy atoms have the same label 'heavy atom': # in the atom data
    """
    H_counts = dict()
    for a in mol1.GetAtomIter(oechem.OEIsHeavy()):
        H_counts[a.GetData(label)] = a.GetTotalHCount()

    for a in mol2.GetAtomIter(oechem.OEIsHeavy()):
        if a.GetTotalHCount() != H_counts[a.GetData(label)]:
            return False
    
    return True

In [7]:
def test_smiles(smiles1, smiles2):
    mol1 = get_labeled_mol(smiles1)
    mol2 = find_tautomer(mol1, smiles2)
    if mol2 is not None:
        res = is_resonance(mol1, mol2)
        print("These are resonance structures: %s" % res)
        return res
    else:
        print("Could not find tautomer with smiles (%s)" % smiles2)
        return None

# Example for different microstates

In [8]:
# These are two different microstates
taut1 = 'c1cc2c(cc1O)c3c(o2)C(=[OH+])NCCC3'
taut2 = 'c1cc2c(cc1[O-])c3c(o2)C(=NCCC3)[O-]'
test_smiles(taut1, taut2)

Could not find tautomer with smiles (c1cc2c(cc1[O-])c3c(o2)C(=NCCC3)[O-])


# Example for same microstate different resonance

In [9]:
# These two are resonance structures of the same microstate:
res1 = 'c1cc2=[O+]C3=C(NCCCC3=c2cc1O)[O-]'
res2 = 'c1cc2c(cc1O)c3c(o2)C(=O)NCCC3'
test_smiles(res1, res2)

These are resonance structures: True


True

In [10]:
res1 = "COc1ccc(cc1)c2c-3c([nH]c[nH+]c3[o+]c2c4ccc(cc4)OC)NCCO" #SM24_mcir003
res2 = "COc1ccc(cc1)c2c3c([nH+]c[nH+]c3oc2c4ccc(cc4)OC)NCCO" #SM24_micro020
test_smiles(res1, res2)

These are resonance structures: True


True

In [11]:
res1 = "COc1ccc(cc1)c2c3c(=[NH+]CCO)[nH]cnc3oc2c4ccc(cc4)OC" #SM24_mcir011
res2 =  "COc1ccc(cc1)c2c3c(=[NH+]CCO)[nH+]c[n-]c3oc2c4ccc(cc4)OC "#SM24_micro016
test_smiles(res1, res2)

These are resonance structures: True


True

In [17]:
# resonance
res1 = "c1ccc(cc1)Cc2nnc(s2)N=C(c3cccs3)[O-]"  #SM03_micro002
res2 = "c1ccc(cc1)Cc2[n-][n-]c(=NC(=O)c3cccs3)[s+]2"  #SM03_micro018

test_smiles(res1, res2)

These are resonance structures: True


True

In [18]:
# resonance
res1 = "c1ccc(cc1)Cc2nnc(s2)/N=C(/c3cccs3)\[O-]"  #SM03_micro007
res2 = "c1ccc(cc1)Cc2[n-][n-]c(=NC(=O)c3cccs3)[s+]2"  #SM03_micro018

test_smiles(res1, res2)

These are resonance structures: True


True

# Example for same microstate different geometric isomers

In [16]:
# Geometric isomers
res1 = "c1ccc(cc1)Cc2nnc(s2)N=C(c3cccs3)[O-]"  #SM03_micro002
res2 = "c1ccc(cc1)Cc2nnc(s2)/N=C(/c3cccs3)\[O-]"  #SM03_micro007
test_smiles(res1, res2)

Could not find tautomer with smiles (c1ccc(cc1)Cc2nnc(s2)/N=C(/c3cccs3)\[O-])


# Find all Microstates

Here's how I would go about finding all the microstates.
However, I just put in some basic tautomer options, they probably do need to be adjusted. 

In [11]:
def find_all_microstates(mol1):
    """
    returns a list of microstates that are not resonance structures of mol1
    
    NOTE - you will probably want to adjust the tautomer options
    """
    taut_opts = oequacpac.OETautomerOptions()
    taut_opts.SetCarbonHybridization(True)
    taut_opts.SetLevel(5)
    taut_opts.SetMaxSearchTime(60.0)
    taut_opts.SetRankTautomers(True)
    taut_opts.SetSaveStereo(True)

    mol = oechem.OEMol(mol1)
    for idx, a in enumerate(mol.GetAtomIter(oechem.OEIsHeavy())):
        a.SetData('heavy', idx)

    microstates = list()
    for taut in oequacpac.OEEnumerateTautomers(mol, taut_opts):
        if not is_resonance(mol, taut):
            microstates.append(oechem.OEMol(taut))

    return microstates