# How to detect resonance structures?

In [1]:
import pandas as pd
from openeye import oechem, oequacpac

In [49]:
def get_labeled_mol(smiles, label='heavy'):
    """
    returns an OEMol with heavy atoms labeled with a specific indice
    """
    mol = oechem.OEMol()
    if not oechem.OESmilesToMol(mol, smiles):
        print("Couldn't parse smiles (%s) returning None" % smiles)
        return None
    
    for idx, a in enumerate(mol.GetAtomIter(oechem.OEIsHeavy())):
        a.SetData('heavy', idx)
    
    return mol


def find_tautomer(mol1, new_taut):
    """
    returns a tautomer of mol1 that has the same cannonical smile as smiles2(new_taut)
    """
    taut_opts = oequacpac.OETautomerOptions()
    taut_opts.SetCarbonHybridization(True)
    taut_opts.SetLevel(5)
    taut_opts.SetMaxSearchTime(60.0)
    taut_opts.SetRankTautomers(True)
    taut_opts.SetSaveStereo(True) 

    mol = oechem.OEMol(mol1)
    temp_mol = oechem.OEMol()
    oechem.OESmilesToMol(temp_mol, new_taut)
    smiles = oechem.OEMolToSmiles(temp_mol)

    if smiles == oechem.OEMolToSmiles(mol):
        print("Initial mol is the same as the tautomer: {} and {}".format(microstate_ID1, microstate_ID2))
        return mol
    
    for taut in oequacpac.OEEnumerateTautomers(mol, taut_opts):
        s = oechem.OEMolToSmiles(taut)
        if s == smiles:
            return oechem.OEMol(taut)
        
    return None


def is_resonance(mol1, mol2, label='heavy'):
    """
    Returns True is the two molecules are resonance structures of each other
    
    This assumes that each heavy atom in mol1 and mol2 have been mapped correct so 
    identical heavy atoms have the same label 'heavy atom': # in the atom data
    """
    H_counts = dict()
    for a in mol1.GetAtomIter(oechem.OEIsHeavy()):
        H_counts[a.GetData(label)] = a.GetTotalHCount()

    for a in mol2.GetAtomIter(oechem.OEIsHeavy()):
        if a.GetTotalHCount() != H_counts[a.GetData(label)]:
            return False
    
    return True


def test_resonance(smiles1, smiles2):
    """
    Returns True if two smiles are resonance structures of the same microstate.
    Returns False if two smiles are determined not to be resonance structures, 
    because their heavy atom hydrogen counts don't match. 
    Returns None if it is undetermined if these two states are resonance 
    structures or not.
    """
    mol1 = get_labeled_mol(smiles1)
    mol2 = find_tautomer(mol1, smiles2)
    if mol2 is not None:
        res = is_resonance(mol1, mol2)
        #print("These are resonance structures: %s" % res)
        return res  
    else:
        #print("Could not find tautomer with smiles (%s)" % smiles2)
        return None
    
def find_resonance_pairs(df):
    # Check for resonance structure pairs in corrected list
    resonance_pairs_list = []

    # Pairwise comparison of microstates
    for i, row in enumerate(df.iterrows()):
        smiles1 = row[1][1]
        microstate_ID1 = row[1][0]
        # print(microstate_ID1, ":", smiles1)

        for j, row in enumerate(df.iterrows()):
            if j<= i:
                continue
            else:
                smiles2 = row[1][1]
                microstate_ID2 = row[1][0]

                #print("Comparing {} and {}...".format(microstate_ID1, microstate_ID2))
                
                #if total charge is the same test resonance
                if(compare_total_charge(smiles1, smiles2)):
                    #print("Total charge is the same. Must compare if these are resonance structures...")
                    if(test_resonance(smiles1, smiles2)): # if resonance structures
                        # print("{} and {} are resonance structures of the same microstate.".format(microstate_ID1, microstate_ID2))
                        resonance_pairs_list.append((microstate_ID1, microstate_ID2))
                else:
                    continue
    
    return resonance_pairs_list
    
    
    
def get_total_charge(smiles):
    mol = oechem.OEMol()
    oechem.OESmilesToMol(mol,smiles)
    
    total_charge = 0
    for a in mol.GetAtomIter():
        total_charge += a.GetFormalCharge()
    return total_charge

def compare_total_charge(smiles1, smiles2):
    total_charge1 = get_total_charge(smiles1)
    total_charge2 = get_total_charge(smiles2)
    if total_charge1 == total_charge2:
        return True
    else:
        return False

In [3]:
# These two are resonance structures of the same microstate:
res1 = 'c1cc2=[O+]C3=C(NCCCC3=c2cc1O)[O-]'
res2 = 'c1cc2c(cc1O)c3c(o2)C(=O)NCCC3'
test_resonance(res1, res2)

True

## Compare microstates of SM03 pairwise to eliminate resonance structures

In [4]:
mol_ID = "SM03"
path_to_input_microstates = "corrections_for_v1_3_1/"
input_file_name = path_to_input_microstates + mol_ID +"_correction.csv"

df_microstates = pd.read_csv(input_file_name)
df_microstates

Unnamed: 0,microstate ID,canonical isomeric SMILES,correction
0,SM03_micro001,c1ccc(cc1)Cc2[nH+]nc(s2)NC(=O)c3cccs3,
1,SM03_micro002,c1ccc(cc1)Cc2nnc(s2)N=C(c3cccs3)[O-],
2,SM03_micro003,c1ccc(cc1)Cc2[n-]nc([s+]2)[N-]C(=O)c3cccs3,deprecated
3,SM03_micro004,c1ccc(cc1)Cc2[nH+]nc(s2)/N=C(/c3cccs3)\[O-],
4,SM03_micro005,c1ccc(cc1)Cc2n[nH+]c(s2)[N-]C(=C3C=CC=[S+]3)[O-],deprecated
5,SM03_micro006,c1ccc(cc1)Cc2[n-][nH]c(=NC(=C3C=CC=[S+]3)[O-])...,deprecated
6,SM03_micro007,c1ccc(cc1)Cc2nnc(s2)/N=C(/c3cccs3)\[O-],deprecated
7,SM03_micro008,c1ccc(cc1)Cc2[n-][nH+]c([s+]2)[N-]C(=O)c3cccs3,deprecated
8,SM03_micro009,c1ccc(cc1)Cc2nnc(s2)[N-]C(=[OH+])c3cccs3,
9,SM03_micro010,c1ccc(cc1)[CH-]c2nnc(s2)NC(=O)c3cccs3,


In [5]:
# Check if there is any deprecated microstate

correction = df_microstates["correction"]
deprecated_boolean = correction.isin(["deprecated"])

deprecated_label = False
for b in deprecated_boolean:
    if b == False:
        continue
    if b == True:
        print("Deprecated microstate found.")
        deprecated_label = True

if(deprecated_label):
    df_corrected = df_microstates.loc[df_microstates["correction"] != "deprecated"]
else:
    df_corrected = df_microstates
    
df_corrected

Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.


Unnamed: 0,microstate ID,canonical isomeric SMILES,correction
0,SM03_micro001,c1ccc(cc1)Cc2[nH+]nc(s2)NC(=O)c3cccs3,
1,SM03_micro002,c1ccc(cc1)Cc2nnc(s2)N=C(c3cccs3)[O-],
3,SM03_micro004,c1ccc(cc1)Cc2[nH+]nc(s2)/N=C(/c3cccs3)\[O-],
8,SM03_micro009,c1ccc(cc1)Cc2nnc(s2)[N-]C(=[OH+])c3cccs3,
9,SM03_micro010,c1ccc(cc1)[CH-]c2nnc(s2)NC(=O)c3cccs3,
10,SM03_micro011,c1ccc(cc1)Cc2nnc(s2)NC(=[OH+])c3cccs3,
11,SM03_micro012,c1ccc(cc1)Cc2nnc(s2)NC(=O)c3cccs3,
12,SM03_micro013,c1ccc(cc1)Cc2n[nH]c(=NC(=O)c3cccs3)s2,
13,SM03_micro014,c1ccc(cc1)[CH-]c2nnc(s2)/N=C(/c3cccs3)\[O-],
14,SM03_micro015,c1ccc(cc1)Cc2[nH+]nc(s2)[N-]C(=[OH+])c3cccs3,


In [6]:
resonance_pairs_list = []

# Pairwise comparison of microstates
for i, row in enumerate(df_corrected.iterrows()):
    smiles1 = row[1][1]
    microstate_ID1 = row[1][0]
    # print(microstate_ID1, ":", smiles1)
    
    for j, row in enumerate(df_corrected.iterrows()):
        if j<= i:
            continue
        else:
            smiles2 = row[1][1]
            microstate_ID2 = row[1][0]
            
            #print("Comparing {} and {}...".format(microstate_ID1, microstate_ID2))
        
            if(test_resonance(smiles1, smiles2)): # if resonance structures
                # print("{} and {} are resonance structures of the same microstate.".format(microstate_ID1, microstate_ID2))
                resonance_pairs_list.append((microstate_ID1, microstate_ID2))

print("\n")
print(mol_ID, "resonance structures:")
for pair in resonance_pairs_list:
    print(pair)



SM03 resonance structures:


## Compare microstates of all small molecules pairwise to eliminate resonance structures
All small molecules except SM18 is scanned. SM18 has 74 microstates so I need a quicker method for that.

In [7]:
path_to_input_microstates = "corrections_for_v1_3_1/"

for m in range(24):
    mol_ID = "SM"+str(m+1).zfill(2)
    
    # Skip SM18
    if mol_ID == "SM18":
        continue
        
    print(mol_ID, "...")
    
    input_file_name = path_to_input_microstates + mol_ID +"_correction.csv"
    df_microstates = pd.read_csv(input_file_name)
    
    
    # Check if there is any deprecated microstate and create corrected microstates list accordingly.

    correction = df_microstates["correction"]
    deprecated_boolean = correction.isin(["deprecated"])

    deprecated_label = False
    for b in deprecated_boolean:
        if b == False:
            continue
        if b == True:
            print("Deprecated microstate found.")
            deprecated_label = True

    if(deprecated_label):
        df_corrected = df_microstates.loc[df_microstates["correction"] != "deprecated"]
    else:
        df_corrected = df_microstates
        
    # Check for resonance structure pairs in corrected list
    
    resonance_pairs_list = []

    # Pairwise comparison of microstates
    for i, row in enumerate(df_corrected.iterrows()):
        smiles1 = row[1][1]
        microstate_ID1 = row[1][0]
        # print(microstate_ID1, ":", smiles1)

        for j, row in enumerate(df_corrected.iterrows()):
            if j<= i:
                continue
            else:
                smiles2 = row[1][1]
                microstate_ID2 = row[1][0]

                #print("Comparing {} and {}...".format(microstate_ID1, microstate_ID2))
                
                #if total charge is the same test resonance
                if(compare_total_charge(smiles1, smiles2)):
                    #print("Total charge is the same. Must compare if these are resonance structures...")
                    if(test_resonance(smiles1, smiles2)): # if resonance structures
                        # print("{} and {} are resonance structures of the same microstate.".format(microstate_ID1, microstate_ID2))
                        resonance_pairs_list.append((microstate_ID1, microstate_ID2))
                else:
                    continue

    print(mol_ID, "resonance structures:")
    for pair in resonance_pairs_list:
        print(pair)
    print("\n")

SM01 ...
Deprecated microstate found.
Deprecated microstate found.
SM01 resonance structures:


SM02 ...
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
SM02 resonance structures:


SM03 ...
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
SM03 resonance structures:


SM04 ...
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
SM04 resonance structures:


SM05 ...
Deprecated microstate found.
Deprecated microstate found.
SM05 resonance structures:


SM06 ...
SM06 resonance structures:


SM07 ...
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
Deprecated microstate found.
SM07 resonance structures:


SM08 ...


## Detecting resonance structures of SM18

Only structures with the same total charge could possibly be resonance structures.
I will split SM18 microstates accordint to total charge sets and than do pairwise comparison.

In [30]:
mol_ID = "SM18"
path_to_input_microstates = "corrections_for_v1_3_1/"
input_file_name = path_to_input_microstates + mol_ID +"_correction.csv"

df_microstates = pd.read_csv(input_file_name)

# Check if there is any deprecated microstate

correction = df_microstates["correction"]
deprecated_boolean = correction.isin(["deprecated"])

deprecated_label = False
for b in deprecated_boolean:
    if b == False:
        continue
    if b == True:
        print("Deprecated microstate found.")
        deprecated_label = True

if(deprecated_label):
    df_corrected = df_microstates.loc[df_microstates["correction"] != "deprecated"]
    df_corrected = df_corrected.reset_index(drop=True)
else:
    df_corrected = df_microstates
    
df_corrected["total charge"] = None
for i,row in enumerate(df_corrected.iterrows()):
    smiles = row[1][1]
    microstate_ID = row[1][0]
    df_corrected.loc[i,"total charge"] =get_total_charge(smiles)
    
df_corrected

Deprecated microstate found.
Deprecated microstate found.


Unnamed: 0,microstate ID,canonical isomeric SMILES,correction,total charge
0,SM18_micro001,c1ccc2c(c1)c(=O)[nH]c(n2)CCC(=O)/[NH+]=C/3\N=C...,,1
1,SM18_micro002,c1ccc2c(c1)c(=O)[n-]c([nH+]2)CCC(=O)Nc3ncc(s3)...,,0
2,SM18_micro003,c1ccc2c(c1)c(=O)nc([nH]2)C/C=C(/Nc3[nH+]cc(s3)...,,0
3,SM18_micro004,c1ccc2c(c1)c(=O)nc([nH]2)CCC(=O)[NH+]=c3[nH]cc...,,1
4,SM18_micro005,c1ccc2c(c1)c(=O)nc([nH]2)CC/C(=N/c3ncc(s3)Cc4c...,,-1
5,SM18_micro006,c1ccc2c(c1)c(=O)[n-]c([nH+]2)CCC(=O)[NH+]=c3[n...,,1
6,SM18_micro007,c1ccc2c(c1)c(=O)[nH+]c([nH]2)C/C=C(/Nc3ncc(s3)...,,0
7,SM18_micro008,c1ccc2c(c1)c(=O)nc([nH]2)CCC(=O)[N-]c3[n-]cc([...,,-1
8,SM18_micro009,c1ccc2c(c1)c(=O)nc([nH]2)CCC(=O)/N=C\3/N=CC(S3...,,0
9,SM18_micro010,c1ccc2c(c1)c(=O)nc([nH]2)CCC(=O)/N=C/3\[NH+]=C...,,1


In [31]:
max_charge = df_corrected["total charge"].max()
min_charge = df_corrected["total charge"].min()
print("Max total charge: ", max_charge)
print("Min total charge: ", min_charge)

Max total charge:  2
Min total charge:  -2


In [51]:
df_SM18_minus2 = df_corrected.loc[df_corrected["total charge"] == -2]
print("Number of microstates: ", df_SM18_minus2.shape[0])

caught_resonance_pairs = find_resonance_pairs(df_SM18_minus2)
for pair in caught_resonance_pairs:
    print(pair)
    print("\n")

Number of microstates:  4


In [54]:
df_SM18_minus1 = df_corrected.loc[df_corrected["total charge"] == -1]
print("Number of microstates: ", df_SM18_minus1.shape[0])

caught_resonance_pairs = find_resonance_pairs(df_SM18_minus1)
for pair in caught_resonance_pairs:
    print(pair)
    print("\n")

Number of microstates:  16
('SM18_micro008', 'SM18_micro023')


('SM18_micro008', 'SM18_micro024')


('SM18_micro008', 'SM18_micro036')


('SM18_micro023', 'SM18_micro024')


('SM18_micro023', 'SM18_micro036')


('SM18_micro024', 'SM18_micro036')




In [56]:
df_SM18_0 = df_corrected.loc[df_corrected["total charge"] == 0]
print("Number of microstates: ", df_SM18_0.shape[0])

caught_resonance_pairs = find_resonance_pairs(df_SM18_0)
for pair in caught_resonance_pairs:
    print(pair)

Number of microstates:  28
('SM18_micro002', 'SM18_micro018')
('SM18_micro002', 'SM18_micro022')
('SM18_micro018', 'SM18_micro022')


In [57]:
df_SM18_plus1 = df_corrected.loc[df_corrected["total charge"] == 1]
print("Number of microstates: ", df_SM18_plus1.shape[0])

caught_resonance_pairs = find_resonance_pairs(df_SM18_plus1)
for pair in caught_resonance_pairs:
    print(pair)

Number of microstates:  21
('SM18_micro004', 'SM18_micro006')
('SM18_micro004', 'SM18_micro014')
('SM18_micro006', 'SM18_micro014')


In [58]:
df_SM18_plus2 = df_corrected.loc[df_corrected["total charge"] == 2]
print("Number of microstates: ", df_SM18_plus2.shape[0])

caught_resonance_pairs = find_resonance_pairs(df_SM18_plus2)
for pair in caught_resonance_pairs:
    print(pair)

Number of microstates:  3
