## Selection

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('dark_background')
from rdkit.Chem import AllChem as Chem

df = pd.read_csv('layouts.csv', index_col=0)
print(len(df), df.columns)
df.head()

978 Index(['Item Name', 'CatalogNumber', 'SMILES', 'Rack Number',
       'Plate Location'],
      dtype='object')


Unnamed: 0,Item Name,CatalogNumber,SMILES,Rack Number,Plate Location
0,Axitinib,S1005,CNC(=O)C1=CC=CC=C1SC1=CC2=C(C=C1)C(\C=C\C1=CC=...,L1300-01,a1
1,Afatinib (BIBW2992),S1011,CN(C)C\C=C\C(=O)NC1=CC2=C(NC3=CC=C(F)C(Cl)=C3)...,L1300-01,b1
2,Bortezomib (Velcade),S1013,CC(C)C[C@H](NC(=O)[C@H](CC1=CC=CC=C1)NC(=O)C1=...,L1300-01,c1
3,Bosutinib (SKI-606),S1014,COC1=C(Cl)C=C(Cl)C(NC2=C(C=NC3=CC(OCCCN4CCN(C)...,L1300-01,d1
4,Dasatinib (BMS-354825),S1021,CC1=NC(NC2=NC=C(S2)C(=O)NC2=C(Cl)C=CC=C2C)=CC(...,L1300-01,e1


In [2]:
df['mols'] = [Chem.MolFromSmiles(i) for i in df.SMILES]
fps_ = [Chem.RDKFingerprint(i) for i in df['mols']]
fps = np.array([Chem.RDKFingerprint(i) for i in df['mols']])
print(fps.shape)
fps

(978, 2048)


array([[1, 1, 0, ..., 1, 0, 1],
       [1, 0, 1, ..., 0, 0, 1],
       [1, 1, 0, ..., 0, 1, 1],
       ...,
       [1, 1, 1, ..., 0, 1, 1],
       [1, 0, 1, ..., 0, 0, 1],
       [1, 0, 1, ..., 0, 0, 1]])

In [3]:
import rdkit
from rdkit import SimDivFilters

from rdkit.SimDivFilters.SimilarityPickers import SpreadPicker

mesotrione = Chem.MolFromSmiles('CS(=O)(=O)C1=CC(=C(C=C1)C(=O)C2C(=O)CCCC2=O)[N+](=O)[O-]')
mesotrione_fp = Chem.RDKFingerprint(mesotrione)

fps_ = [Chem.RDKFingerprint(i) for i in df['mols']]
for i, j in zip(fps_, df['Item Name']):
    i._id = j
    
picker = SpreadPicker(numToPick=96, 
                      dataSet=fps_, 
                      probeFps=[mesotrione_fp])

selection = df.loc[df['Item Name'].isin([i._id for i, j in picker]), :]
selection

Unnamed: 0,Item Name,CatalogNumber,SMILES,Rack Number,Plate Location,mols
8,Lapatinib Ditosylate (Tykerb),S1028,CC1=CC=C(C=C1)S(O)(=O)=O.CC1=CC=C(C=C1)S(O)(=O...,L1300-01,a2,<rdkit.Chem.rdchem.Mol object at 0x7fb4bc745620>
10,Nilotinib (AMN-107),S1033,CC1=CN(C=N1)C1=CC(NC(=O)C2=CC=C(C)C(NC3=NC=CC(...,L1300-01,c2,<rdkit.Chem.rdchem.Mol object at 0x7fb4bc7456c0>
11,Pazopanib HCl,S1035,Cl.CN(C1=CC2=NN(C)C(C)=C2C=C1)C1=NC(NC2=CC=C(C...,L1300-01,d2,<rdkit.Chem.rdchem.Mol object at 0x7fb4bc745710>
25,Docetaxel (Taxotere),S1148,CC(=O)O[C@@]12CO[C@@H]1C[C@H](O)[C@]1(C)C2[C@H...,L1300-01,b4,<rdkit.Chem.rdchem.Mol object at 0x7fb4bc745bc0>
26,Paclitaxel (Taxol),S1150,CC(=O)O[C@@H]1C2=C(C)[C@H](C[C@@](O)([C@@H](OC...,L1300-01,c4,<rdkit.Chem.rdchem.Mol object at 0x7fb4bc745c10>
...,...,...,...,...,...,...
931,Nalmefene HCl,S4341,Cl.[H][C@@]12OC3=C(O)C=CC4=C3[C@@]11CCN(CC3CC3...,L1300-11,d7,<rdkit.Chem.rdchem.Mol object at 0x7fb4bc759e90>
944,Noscapine HCl,S4363,Cl.[H][C@@]1(OC(=O)C2=C1C=CC(OC)=C2OC)[C@]1([H...,L1300-11,a9,<rdkit.Chem.rdchem.Mol object at 0x7fb4bc75a300>
962,Brucine,S4390,O.OS(O)(=O)=O.[H][C@]12C[C@@]3([H])C4=CCOC5CC(...,L1300-11,c11,<rdkit.Chem.rdchem.Mol object at 0x7fb4bc75a8a0>
966,Isoxicam,S4398,CN1C(C(=O)NC2=NOC(C)=C2)=C(O)C2=CC=CC=C2S1(=O)...,L1300-11,g11,<rdkit.Chem.rdchem.Mol object at 0x7fb4bc75a9e0>
