# Organizing Final Molecule Lists for Ordering
Selected molecules will be matched with Supplier Name and Catalog Number based on search results of eMolecules. Both fragment-like and drug-like sets will be organized by vendor for ease of ordering. 

In [1]:
import pandas as pd

In [2]:
df_eMol = pd.read_csv("Quote-Cart_searched_with_isoSMILES_tier1_100mg.csv")
df_eMol.head()

Unnamed: 0,eMolecules_Order_Number,NAME,PO_Number,ORIG_ROWNUM,MATCH_TYPE,MWT,SMILES,SUPPLIER_NAME,CATALOG_NUMBER,SHIPPED_AMOUNT,SHIPPED_UNITS,BARCODE,PLATE_BARCODE,LOCATION,Text 1,EMOLECULES_ID
0,350033304,Mehtap Isik,,1,exact,237.26,c1ccc(cc1)Nc1nnc(o1)c1ccccc1,Enamine Screening Compounds,Z223448766,,mg,,,,,37308
1,350033304,Mehtap Isik,,2,exact,270.24,Oc1ccc(cc1)c1cc(=O)c2c(o1)cc(cc2O)O,Vitas M Labs,STK801630,,mg,,,,,478202
2,350033304,Mehtap Isik,,3,exact,130.15,c1ccc2c(c1)ncnc2,Maybridge,AC15398,,mg,,,,,479644
3,350033304,Mehtap Isik,,4,exact,76.12,NC(=S)N,Vitas M Labs,STL194300,,mg,,,,,484431
4,350033304,Mehtap Isik,,5,exact,234.2,COc1ccc2c(c1)oc(=O)cc2CC(=O)O,Enamine Screening Compounds,Z276516410,,mg,,,,,490595


### Organize fragment-like set

In [3]:
df_frag = pd.read_csv("df_frag_final.csv")
df_frag.head()

Unnamed: 0,eMolecules ID,canonical isomeric SMILES,eMolecules SMILES,"pKas in [3,11]",XlogP,MolWt,Availability (mg),Price,group,N_Rot,N_UV_chrom,Selection,Bin index,Priority,Final list
0,6679830,c1cc2c(cc1O)c3c(o2)C(=O)NCCC3,Oc1cc2c3CCCNC(=O)c3oc2cc1,[9.119],0.72,217.221,184.0,533.0,fragment-like,0,27,picked,4,1,True
1,719540,c1ccc(cc1)n2c3c(cn2)c(ncn3)N,Nc1ncnc2c1cnn2c1ccccc1,[3.869],1.499,211.223,3430.0,414.0,fragment-like,1,31,picked,8,1,True
2,37095168,c1ccc2c(c1)ncn2c3ccc(cc3)O,Oc1ccc(cc1)n1cnc2c1cccc2,"[5.82, 8.709]",2.219,210.231,21650.2,148.0,fragment-like,1,40,picked,11,1,True
3,37053191,c1ccc(cc1)c2[nH]c3ccc(cc3n2)C(=O)N,NC(=O)c1ccc2c(c1)nc([nH]2)c1ccccc1,[6.342],2.192,237.257,2000.0,168.0,fragment-like,2,42,picked,11,2,True
4,31653344,c1ccc(cc1)n2cnc3c2ccc(c3)N,Nc1ccc2c(c1)ncn2c1ccccc1,[6.348],2.333,209.247,50213.0,148.0,fragment-like,1,40,picked,12,1,True


In [4]:
df_frag["Supplier Name"] = None
df_frag["Catalog Number"] = None

for i, row in df_frag.iterrows():
    eMolecules_ID = row["eMolecules ID"]
    
    # Supplier
    supplier = df_eMol.loc[df_eMol["EMOLECULES_ID"] == eMolecules_ID]["SUPPLIER_NAME"]
    supplier = str(supplier.values[0])
    df_frag.loc[i, "Supplier Name"] = supplier
    
    # Catalog number
    cat_number = df_eMol.loc[df_eMol["EMOLECULES_ID"] == eMolecules_ID]["CATALOG_NUMBER"]
    cat_number = str(cat_number.values[0])
    df_frag.loc[i, "Catalog Number"] = cat_number
    
# Sort by supplier
df_frag = df_frag.sort_values("Supplier Name")
df_frag.head()

Unnamed: 0,eMolecules ID,canonical isomeric SMILES,eMolecules SMILES,"pKas in [3,11]",XlogP,MolWt,Availability (mg),Price,group,N_Rot,N_UV_chrom,Selection,Bin index,Priority,Final list,Supplier Name,Catalog Number
0,6679830,c1cc2c(cc1O)c3c(o2)C(=O)NCCC3,Oc1cc2c3CCCNC(=O)c3oc2cc1,[9.119],0.72,217.221,184.0,533.0,fragment-like,0,27,picked,4,1,True,ChemDiv,5816-0042
15,1327907,c1ccc2c(c1)c(ncn2)Nc3cccc(c3)C(F)(F)F,FC(c1cccc(c1)Nc1ncnc2c1cccc2)(F)F,[4.05],3.269,289.255,101.0,355.0,fragment-like,3,36,picked,17,2,True,ChemDiv,3232-0333
18,18908671,c1ccc(c(c1)NC(=O)c2ccc(o2)Cl)N3CCCCC3,Clc1ccc(o1)C(=O)Nc1ccccc1N1CCCCC1,[5.346],3.795,304.771,424.3,148.0,fragment-like,4,18,picked,19,1,True,Enamine Screening Compounds,Z119335440
17,30719859,c1ccc2c(c1)c(ncn2)NCc3ccc(cc3)Cl,Clc1ccc(cc1)CNc1ncnc2c1cccc2,[5.564],3.523,269.729,415.5,148.0,fragment-like,3,36,picked,18,2,True,Enamine Screening Compounds,Z126957826
16,1228629,c1ccc(cc1)Cc2nnc(s2)NC(=O)c3cccs3,O=C(c1cccs1)Nc1nnc(s1)Cc1ccccc1,[7.12],3.605,301.387,379.0,148.0,fragment-like,5,14,picked,18,1,True,Enamine Screening Compounds,Z27474679


In [5]:
df_frag.to_csv("df_frag_final_supplier.csv")

### Organize drug-like set

In [6]:
df_drug = pd.read_csv("df_drug_final.csv")
df_drug.head()

Unnamed: 0,eMolecules ID,canonical isomeric SMILES,eMolecules SMILES,"pKas in [3,11]",XlogP,MolWt,Availability (mg),Price,group,N_Rot,N_UV_chrom,Selection,Bin index,Priority,Final list
0,536848,c1cc2c(cc(c(c2nc1)O)I)I,Ic1cc(I)c2c(c1O)nccc2,"[3.511, 6.794]",3.371,396.951,239.0,168.0,drug-like,0,29,picked,0.0,1,True
1,4375254,CCOC(=O)c1ccc(cc1)Nc2cc(nc(n2)Nc3ccc(cc3)C(=O)...,CCOC(=O)c1ccc(cc1)Nc1cc(C)nc(n1)Nc1ccc(cc1)C(=...,[6.336],2.937,420.461,319.0,168.0,drug-like,10,28,picked,0.0,3,True
2,18897105,c1ccc2c(c1)c(=O)[nH]c(n2)CCC(=O)Nc3ncc(s3)Cc4c...,O=C(Nc1ncc(s1)Cc1ccc(c(c1)F)F)CCc1nc2ccccc2c(=...,"[9.381, 10.773]",3.341,426.439,247.7,223.0,drug-like,7,37,picked,0.0,2,True
3,1574612,c1cc(cc(c1)Br)Nc2c(cnc(n2)Nc3cccc(c3)Br)F,Brc1cccc(c1)Nc1ncc(c(n1)Nc1cccc(c1)Br)F,[3.892],4.14,438.092,222.0,168.0,drug-like,4,28,picked,2.0,1,True
4,3365457,CCOc1ccc2c(c1)sc(n2)NC(=O)Cc3ccc(c(c3)Cl)Cl,CCOc1ccc2c(c1)sc(n2)NC(=O)Cc1ccc(c(c1)Cl)Cl,[9.167],5.171,381.276,489.9,148.0,drug-like,6,28,picked,3.0,1,True


In [7]:
df_drug["Supplier Name"] = None
df_drug["Catalog Number"] = None

for i, row in df_drug.iterrows():
    eMolecules_ID = row["eMolecules ID"]
    
    # Supplier
    supplier = df_eMol.loc[df_eMol["EMOLECULES_ID"] == eMolecules_ID]["SUPPLIER_NAME"]
    supplier = str(supplier.values[0])
    df_drug.loc[i, "Supplier Name"] = supplier
    
    # Catalog number
    cat_number = df_eMol.loc[df_eMol["EMOLECULES_ID"] == eMolecules_ID]["CATALOG_NUMBER"]
    cat_number = str(cat_number.values[0])
    df_drug.loc[i, "Catalog Number"] = cat_number
    
# Sort by supplier
df_drug = df_drug.sort_values("Supplier Name")
df_drug.head() 

Unnamed: 0,eMolecules ID,canonical isomeric SMILES,eMolecules SMILES,"pKas in [3,11]",XlogP,MolWt,Availability (mg),Price,group,N_Rot,N_UV_chrom,Selection,Bin index,Priority,Final list,Supplier Name,Catalog Number
2,18897105,c1ccc2c(c1)c(=O)[nH]c(n2)CCC(=O)Nc3ncc(s3)Cc4c...,O=C(Nc1ncc(s1)Cc1ccc(c(c1)F)F)CCc1nc2ccccc2c(=...,"[9.381, 10.773]",3.341,426.439,247.7,223.0,drug-like,7,37,picked,0.0,2,True,Enamine Screening Compounds,Z278071350
4,3365457,CCOc1ccc2c(c1)sc(n2)NC(=O)Cc3ccc(c(c3)Cl)Cl,CCOc1ccc2c(c1)sc(n2)NC(=O)Cc1ccc(c(c1)Cl)Cl,[9.167],5.171,381.276,489.9,148.0,drug-like,6,28,picked,3.0,1,True,Enamine Screening Compounds,Z30206127
7,10794751,CC(C)(C)c1cc(n(n1)c2ccccc2)NC(=O)Nc3cccc(c3Cl)Cl,O=C(Nc1cccc(c1Cl)Cl)Nc1cc(nn1c1ccccc1)C(C)(C)C,[4.113],5.784,403.305,324.5,148.0,drug-like,6,35,picked,4.0,1,True,Enamine Screening Compounds,Z2216889245
8,5428718,c1ccc(c(c1)C(=O)Nc2nnc(s2)SCc3ccc(cc3)Br)Cl,Brc1ccc(cc1)CSc1nnc(s1)NC(=O)c1ccccc1Cl,[6.525],5.9,440.765,239.5,400.0,drug-like,6,24,picked,4.0,2,True,Life Chemicals,F0417-1895
6,3064762,c1ccc(cc1)C(=O)Nc2ccc(cc2)Oc3c4c5c(sc4ncn3)CCCC5,O=C(c1ccccc1)Nc1ccc(cc1)Oc1ncnc2c1c1CCCCc1s2,[3.199],4.718,401.481,636.9,249.0,drug-like,5,34,picked,3.0,3,True,UORSY,PB31167343


In [8]:
df_drug.to_csv("df_drug_final_supplier.csv")