In [1]:
## Import necessary packages
import rdkit
from rdkit import Chem
import pandas as pd
import rdkit.Chem.rdMolDescriptors
import rdkit.Chem.rdchem
from rdkit.Chem.Draw import IPythonConsole

In [2]:
## Function that assigns an atom ID to each atom in molecule
def mol_with_atom_index( mol ):
    atoms = mol.GetNumAtoms()
    for idx in range( atoms ):
        mol.GetAtomWithIdx( idx ).SetProp( 'molAtomMapNumber', str(mol.GetAtomWithIdx( idx ).GetIdx()))
    return mol

In [3]:
## Define the bent structural pattern
patt = Chem.MolFromSmarts("[!R]~[R]~[R]~[!R]")

In [4]:
## Read the import file
library = pd.read_csv(open('SMILES_nothiol.csv', 'r'))

In [5]:
## Create empty list that will be used to check number of molecules with a bent substructure match
matchList = []
## Create empty list that indicates whether each molecule has a bent substructure match
hasCShape = []

In [6]:
## Iterate through input library to check each molecule for a bent substructure match
for i in library.iterrows():
    mol = Chem.MolFromSmiles(i[1]['SMILES No Disulfide, Uncorrected'])
    
    ## Index molcule and its bonds for substructure search
    mol_with_atom_index(mol)
    bond_inds = {str(sorted((bond.GetBeginAtomIdx(),bond.GetEndAtomIdx()))):bond.GetIdx() for bond in mol.GetBonds()}
    
    ## Search molecule for bent shape
    matches = mol.GetSubstructMatches(patt)
    
    ## Create empty list to save substructure matches that meet further pattern criteria
    final_matches = []
    
    ## Define amide substructure
    patt2 = Chem.MolFromSmarts("[N]C=O")
    
    ## Specify molecule without bent shapes as non-match
    if len(matches) == 0:
        hasCShape.append(False)
    
    ## Check molecule with bent shapes for other pattern criteria
    else:
        for match in matches:
            ## Save bonds in substructure extending off ring
            bond_idx_match1 = bond_inds[str(sorted((match[0],match[1])))]
            bond_idx_match2 = bond_inds[str(sorted((match[2],match[3])))]
            
            ## Fragment molecule on bonds extending off ring
            fragmentedMol = Chem.FragmentOnBonds(mol, (bond_idx_match1,bond_idx_match2))
            fragments = Chem.rdmolops.GetMolFrags(fragmentedMol, asMols = True)
    
            ## Save the two fragments that do not contain the ring of the substructure in a list
            listOfFrags = []
            for frag in fragments:
                fragIndices = []
                for atom in frag.GetAtoms():
                    fragIndices.append(atom.GetAtomMapNum())
                if set(fragIndices).intersection(match[1:3]):
                    fragContainingRing = frag
                else:
                    listOfFrags.append(frag)
            
            ## Check if one fragment contains an amide and no ring while other fragment contains at least one aromatic ring
            frag1 = listOfFrags[0]
            frag2 = listOfFrags[1]
            if frag1.HasSubstructMatch(patt2) and Chem.rdMolDescriptors.CalcNumRings(frag1) == 0:
                if Chem.rdMolDescriptors.CalcNumAromaticRings(frag2) >= 1:
                    final_matches.append(True)
            elif Chem.rdMolDescriptors.CalcNumAromaticRings(frag1) >= 1:
                if frag2.HasSubstructMatch(patt2) and Chem.rdMolDescriptors.CalcNumRings(frag2) == 0:
                    final_matches.append(True)
         
        ## Indicate if molecule has passed bent pattern requirements by sorting into lists
        if len(final_matches) >= 1:
            hasCShape.append(True)
            matchList.append(True)
        else:
            hasCShape.append(False)    

In [7]:
## Check number of molecules matching bent pattern
print(len(matchList))

45


In [8]:
## Create new column in file indicating bent pattern match status
library['BentMatch'] = hasCShape

In [9]:
## Exports all input data plus new column to a csv file
library.to_csv('BentShapeSubstructureFinalMatches.csv',index=False)