## Searching for good testdata

### Config

In [None]:
min_molecules_in_mcs = 4

## Helper functions

In [None]:
from rdkit.Chem import MolFromSmiles, MolFromInchi, Mol
from typing import Generator, TypeVar
from rdkit.Chem.rdFMCS import MCSResult,FindMCS
from rdkit.Chem.Draw import MolToImage

In [None]:
A = TypeVar("A")
def generate_sublists(input_list: list[A]) -> Generator[list[A], None, None]:
    n = len(input_list)
    for i in range(n):
        for j in range(i + min_molecules_in_mcs+1, n + 1):
            yield input_list[i:j]

In [None]:
def print_mols(mols: dict[str, Mol]):
    for name, mol in mols.items():
        print(name)
        display(MolToImage(mol))

In [None]:
def get_name(mol: Mol) -> str:
    return mol.GetProp("name")

def enumerate_mcs(mols: dict[str, Mol]) -> Generator[tuple[list[str], MCSResult], None, None]:
    for key, mol in mols.items():
        mol.SetProp("name", key)

    for sub in generate_sublists(list(mols.values())):
        result = FindMCS(sub)

        yield map(get_name, sub), result


In [None]:
def print_mcs_matches(mols: dict[str, Mol]):
    for names, result in enumerate_mcs(mols):
        print(','.join(names))
        display(result.queryMol)


## Datasets

### Platalet antagonists

From [https://en.wikipedia.org/wiki/Platelet-activating_factor]:

In [None]:
PARf_antagonists = dict[str, Mol]
PARf_antagonists = {
    "afafant": MolFromInchi("InChI=1S/C22H22ClN5O2S/c1-14-25-26-19-13-24-21(16-4-2-3-5-18(16)23)17-12-15(31-22(17)28(14)19)6-7-20(29)27-8-10-30-11-9-27/h2-5,12H,6-11,13H2,1H3"),
    "israpafant": MolFromInchi("InChI=1S/C28H29ClN4S/c1-17(2)15-21-11-9-20(10-12-21)13-14-22-16-24-26(23-7-5-6-8-25(23)29)30-18(3)27-32-31-19(4)33(27)28(24)34-22/h5-12,16-18H,13-15H2,1-4H3/t18-/m1/s1"),
    "lexipafant": MolFromInchi("InChI=1S/C23H30N4O4S/c1-6-31-23(28)22(13-16(2)3)26(5)32(29,30)19-9-7-18(8-10-19)15-27-17(4)25-20-14-24-12-11-21(20)27/h7-12,14,16,22H,6,13,15H2,1-5H3/t22-/m0/s1"),
    "rupatadine": MolFromInchi("InChI=1S/C26H26ClN3/c1-18-13-19(16-28-15-18)17-30-11-8-20(9-12-30)25-24-7-6-23(27)14-22(24)5-4-21-3-2-10-29-26(21)25/h2-3,6-7,10,13-16H,4-5,8-9,11-12,17H2,1H3"),
    "modipafant": MolFromInchi("InChI=1S/C34H29ClN6O3/c1-4-44-34(43)31-30(24-9-5-6-10-25(24)35)29(33(42)40-28-11-7-8-17-37-28)20(2)38-32(31)22-12-14-23(15-13-22)41-21(3)39-26-19-36-18-16-27(26)41/h5-19,30,38H,4H2,1-3H3,(H,37,40,42)/t30-/m1/s1"),
    "etizolam": MolFromInchi("InChI=1S/C17H15ClN4S/c1-3-11-8-13-16(12-6-4-5-7-14(12)18)19-9-15-21-20-10(2)22(15)17(13)23-11/h4-8H,3,9H2,1-2H3"),
    "sm-12502": MolFromInchi("InChI=1S/C10H12N2OS.ClH/c1-7-9(13)12(2)10(14-7)8-4-3-5-11-6-8;/h3-7,10H,1-2H3;1H/t7-,10+;/m0./s1"),
    "cv-3988": MolFromInchi("InChI=1S/C28H53N2O7PS/c1-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-18-19-29-28(31)35-24-27(34-2)25-37-38(32,33)36-22-20-30-21-23-39-26-30/h21,23,26-27H,3-20,22,24-25H2,1-2H3,(H-,29,31,32,33)")
}

In [None]:
print_mols(PARf_antagonists)

In [None]:
print_mcs_matches(PARf_antagonists)