In [206]:
from rdkit import Chem

def delete_substructs(full_mol, matched_atom_maps):

    filtered_smiles = []
    all_mols_smiles = Chem.MolToSmiles(full_mol)
    detached_mols_smiles = all_mols_smiles.split(".")
    substruct_smiles = None

    for mol_smiles in detached_mols_smiles:
        mol = Chem.MolFromSmiles(mol_smiles)

        atom_maps = [atom.GetAtomMapNum() for atom in mol.GetAtoms()]
        if set(atom_maps) == matched_atom_maps:
            substruct_smiles = mol_smiles
        else:
            filtered_smiles.append(mol_smiles)

    filtered_smiles = '.'.join(filtered_smiles)
    filtered_mol = Chem.RWMol(Chem.MolFromSmiles(filtered_smiles))
    substruct_mol = Chem.MolFromSmiles(substruct_smiles)
    return filtered_mol, substruct_mol


In [211]:
#! TESTING
START_PLACEHOLDER_MAP_FROM = 999
curr_placeholder_map_num = START_PLACEHOLDER_MAP_FROM

mol_smiles = "O=COOC=O"
# mol_smiles = "O=Cc2cccc(c1ccc(Cl)cc1)c2"
mol = Chem.MolFromSmiles(mol_smiles)
mol = Chem.RWMol(mol) # make editable

all_bonds_removed_map2map = []
placeholder_to_substruct_mol = dict()

# add some atom map indices
for i, atom in enumerate(mol.GetAtoms()):
    atom.SetAtomMapNum(i+1)

all_maccs_smarts = ['O=CO', 'N=CO', 'c1ccccc1'] # define
for maccs_smart in all_maccs_smarts:

    pattern = Chem.MolFromSmarts(maccs_smart)
    match_tuple = mol.GetSubstructMatch(pattern)

    while match_tuple:

        bonds_to_remove = []
        bonds_to_add_back = dict()
        substruct_found = True

        matched_atom_maps = set()
        matched_atom_ids = set(match_tuple)
        bonds_removed = []

        for subs_atom_id in match_tuple:
            subs_atom = mol.GetAtomWithIdx(subs_atom_id)
            matched_atom_maps.add(subs_atom.GetAtomMapNum())

            for nbr_atom in subs_atom.GetNeighbors():

                if nbr_atom.GetIdx() in matched_atom_ids:
                    continue

                bond_type = mol.GetBondBetweenAtoms(nbr_atom.GetIdx(), subs_atom_id).GetBondType()
                bonds_to_remove.append((nbr_atom, subs_atom_id, bond_type))
                bonds_to_add_back[nbr_atom.GetAtomMapNum()] = bond_type
                bonds_removed.append((nbr_atom.GetAtomMapNum(), subs_atom.GetAtomMapNum()))

        # disconnect substructure from neighbors
        for (nbr_atom, subs_atom_id, bond_type)  in bonds_to_remove:
            explicit_Hs = nbr_atom.GetNumExplicitHs()
            mol.RemoveBond(nbr_atom.GetIdx(), subs_atom_id)
            nbr_atom.SetNumExplicitHs(explicit_Hs + int(bond_type))

        # deleting substructure match
        mol, substruct_mol = delete_substructs(mol, matched_atom_maps)

        # for reconstruction
        all_bonds_removed_map2map.extend(bonds_removed)
        placeholder_to_substruct_mol[curr_placeholder_map_num] = substruct_mol

        # replacing substructure with placeholder
        placeholder_atom = Chem.Atom('*')
        placeholder_atom.SetAtomMapNum(curr_placeholder_map_num)
        curr_placeholder_map_num += 1
        placeholder_atom_id = mol.AddAtom(placeholder_atom)

        # after DeleteSubstructs, atom indices will be changed but atom maps will be the same
        # hence, we have to make use of atom map numbers after delete (as used in bonds_to_add_back)
        for atom in mol.GetAtoms():
            if atom.GetAtomMapNum() in bonds_to_add_back:
                explicit_Hs = atom.GetNumExplicitHs()
                mol.AddBond(placeholder_atom_id, atom.GetIdx(), bonds_to_add_back[atom.GetAtomMapNum()])
                atom.SetNumExplicitHs(explicit_Hs - bonds_to_add_back[atom.GetAtomMapNum()])

        match_tuple = mol.GetSubstructMatch(pattern)
        # Possible improvement: use GetSubstructureMatches() and do one
        # deletion for each match of the current maccs_smart.

print(Chem.MolToSmiles(mol))

[*:999][*:1000]


In [210]:
placeholder_to_substruct_mol

{999: <rdkit.Chem.rdchem.Mol at 0x199487e10d0>,
 1000: <rdkit.Chem.rdchem.Mol at 0x1994860f670>}