In [1]:
from Constraints import MIPMol
from rdkit import Chem
from gurobipy import GRB

# set the number of atoms and types of atoms
N = 20
Mol = MIPMol(atoms=["C", "N", "O", "S"], N_atoms=N)

# set the bounds for the number of each type of atom (optional)
lb = [N // 2, None, None, None]
ub = [None, N // 4, N // 4, N // 4]
Mol.bounds_atoms(lb, ub)

# set the bounds for number of double/triple bonds, and rings (optional)
Mol.bounds_double_bonds(None, N // 2)
Mol.bounds_triple_bonds(None, N // 2)
Mol.bounds_rings(None, 0)

Mol.exclude_substructures(["[N,O,S]~[N,O,S]"], "SMARTS")
Mol.exclude_substructures(["[N,O,S]~[C,N,O,S]~[N,O,S]"], "SMARTS")
Mol.exclude_substructures(["[C&H0]"], "SMARTS")
# Mol.exclude_substructures(["CC(C)S"], "SMILES")

# time is also printed now
# TODO: the TimeLimit works for each batch now, should put a TimeLimit on total time
mols, total_time, extract_time = Mol.solve(NumSolutions=1000, TimeLimit=3600, BatchSize=100)

print("Total time =", total_time)
print("Time spent on extracting solutions =", extract_time)

Set parameter Username
Academic license - for non-commercial use only - expires 2025-03-11


  0%|                                                                                           | 0/10 [00:00<?, ?it/s]

Discarded solution information
Reset all parameters


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:03<00:00,  2.89it/s]

Total time = 3.4687767028808594
Time spent on extracting solutions = 0.801180362701416





In [2]:
from typing import Literal, Union
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Descriptors

fps = np.load("chembl_fps.npy", allow_pickle=True).item()

# checking if mol has weird substructures, if it does, put all weird substructures into a dictionary
def has_chembl_substruct(mol: Chem.Mol, weird_substructs: dict) -> int:
    """0 for molecuels with substructures (ECFP2) that occur less often than 5 times in ChEMBL."""
    fpgen = AllChem.GetMorganGenerator(radius=1)
    ao = AllChem.AdditionalOutput()
    ao.CollectBitInfoMap()
    fp = fpgen.GetSparseCountFingerprint(mol,additionalOutput=ao)
    info = ao.GetBitInfoMap()
    res = True
    for bit in fp.GetNonzeroElements().keys():
        if bit not in fps:
            res = False
            idx = info[bit][0][0]
            env = Chem.FindAtomEnvironmentOfRadiusN(mol,1,idx)
            submol=Chem.PathToSubmol(mol,env,atomMap={})
            smiles = Chem.MolToSmiles(submol)
            if smiles not in weird_substructs:
                weird_substructs[smiles] = 1
            else:
                weird_substructs[smiles] += 1
    return res

weird_substructs = {}
cnt = 0
for mol in mols:
    mol.UpdatePropertyCache()
    cnt += has_chembl_substruct(mol, weird_substructs)
print(cnt)
weird_substructs = sorted(weird_substructs.items(), key=lambda item: -item[1])
print(weird_substructs)

842
[('CC(C)S', 152), ('CC=S', 9), ('CSC', 1)]
