In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
import seaborn as sns

from chembl_webresource_client.new_client import new_client
from rdkit import Chem
from rdkit.Chem import Descriptors, Lipinski

# PART 1
## Reading from CHEMBL

In [2]:
# Target search for coronavirus
target = new_client.target
target_query = target.search('acetylcholinesterase')
targets = pd.DataFrame(target_query)
targets

Unnamed: 0,cross_references,organism,pref_name,score,species_group_flag,target_chembl_id,target_components,target_type,tax_id
0,"[{'xref_id': 'P22303', 'xref_name': None, 'xre...",Homo sapiens,Acetylcholinesterase,27.0,False,CHEMBL220,"[{'accession': 'P22303', 'component_descriptio...",SINGLE PROTEIN,9606
1,[],Homo sapiens,Cholinesterases; ACHE & BCHE,27.0,False,CHEMBL2095233,"[{'accession': 'P06276', 'component_descriptio...",SELECTIVITY GROUP,9606
2,[],Drosophila melanogaster,Acetylcholinesterase,17.0,False,CHEMBL2242744,"[{'accession': 'P07140', 'component_descriptio...",SINGLE PROTEIN,7227
3,"[{'xref_id': 'P04058', 'xref_name': None, 'xre...",Torpedo californica,Acetylcholinesterase,15.0,False,CHEMBL4780,"[{'accession': 'P04058', 'component_descriptio...",SINGLE PROTEIN,7787
4,"[{'xref_id': 'P21836', 'xref_name': None, 'xre...",Mus musculus,Acetylcholinesterase,15.0,False,CHEMBL3198,"[{'accession': 'P21836', 'component_descriptio...",SINGLE PROTEIN,10090
5,"[{'xref_id': 'P37136', 'xref_name': None, 'xre...",Rattus norvegicus,Acetylcholinesterase,15.0,False,CHEMBL3199,"[{'accession': 'P37136', 'component_descriptio...",SINGLE PROTEIN,10116
6,"[{'xref_id': 'O42275', 'xref_name': None, 'xre...",Electrophorus electricus,Acetylcholinesterase,15.0,False,CHEMBL4078,"[{'accession': 'O42275', 'component_descriptio...",SINGLE PROTEIN,8005
7,"[{'xref_id': 'P23795', 'xref_name': None, 'xre...",Bos taurus,Acetylcholinesterase,15.0,False,CHEMBL4768,"[{'accession': 'P23795', 'component_descriptio...",SINGLE PROTEIN,9913
8,[],Anopheles gambiae,Acetylcholinesterase,15.0,False,CHEMBL2046266,"[{'accession': 'Q869C3', 'component_descriptio...",SINGLE PROTEIN,7165
9,[],Bemisia tabaci,AChE2,15.0,False,CHEMBL2366409,"[{'accession': 'B3SST5', 'component_descriptio...",SINGLE PROTEIN,7038


In [3]:
selected_target = targets.target_chembl_id[0]
selected_target

'CHEMBL220'

In [4]:
activity = new_client.activity
res = activity.filter(target_chembl_id=selected_target).filter(standard_type="IC50")
df = pd.DataFrame(res)
df

Unnamed: 0,activity_comment,activity_id,activity_properties,assay_chembl_id,assay_description,assay_type,assay_variant_accession,assay_variant_mutation,bao_endpoint,bao_format,bao_label,canonical_smiles,data_validity_comment,data_validity_description,document_chembl_id,document_journal,document_year,ligand_efficiency,molecule_chembl_id,molecule_pref_name,parent_molecule_chembl_id,pchembl_value,potential_duplicate,qudt_units,record_id,relation,src_id,standard_flag,standard_relation,standard_text_value,standard_type,standard_units,standard_upper_value,standard_value,target_chembl_id,target_organism,target_pref_name,target_tax_id,text_value,toid,type,units,uo_units,upper_value,value
0,,33969,[],CHEMBL643384,Inhibitory concentration against acetylcholine...,B,,,BAO_0000190,BAO_0000357,single protein format,CCOc1nn(-c2cccc(OCc3ccccc3)c2)c(=O)o1,,,CHEMBL1148382,J. Med. Chem.,2004.0,"{'bei': '19.61', 'le': '0.36', 'lle': '3.32', ...",CHEMBL133897,,CHEMBL133897,6.12,False,http://www.openphacts.org/units/Nanomolar,252547,=,1,True,=,,IC50,nM,,750.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,0.75
1,,37563,[],CHEMBL643384,Inhibitory concentration against acetylcholine...,B,,,BAO_0000190,BAO_0000357,single protein format,O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC1CC1,,,CHEMBL1148382,J. Med. Chem.,2004.0,"{'bei': '18.57', 'le': '0.38', 'lle': '2.45', ...",CHEMBL336398,,CHEMBL336398,7.00,False,http://www.openphacts.org/units/Nanomolar,252533,=,1,True,=,,IC50,nM,,100.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,0.1
2,,37565,[],CHEMBL643384,Inhibitory concentration against acetylcholine...,B,,,BAO_0000190,BAO_0000357,single protein format,CN(C(=O)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F)c1ccccc1,,,CHEMBL1148382,J. Med. Chem.,2004.0,,CHEMBL131588,,CHEMBL131588,,False,http://www.openphacts.org/units/Nanomolar,252530,>,1,True,>,,IC50,nM,,50000.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,50.0
3,,38902,[],CHEMBL643384,Inhibitory concentration against acetylcholine...,B,,,BAO_0000190,BAO_0000357,single protein format,O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F,,,CHEMBL1148382,J. Med. Chem.,2004.0,"{'bei': '16.11', 'le': '0.34', 'lle': '1.81', ...",CHEMBL130628,,CHEMBL130628,6.52,False,http://www.openphacts.org/units/Nanomolar,252534,=,1,True,=,,IC50,nM,,300.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,0.3
4,,41170,[],CHEMBL643384,Inhibitory concentration against acetylcholine...,B,,,BAO_0000190,BAO_0000357,single protein format,CSc1nc(-c2ccc(OC(F)(F)F)cc2)nn1C(=O)N(C)C,,,CHEMBL1148382,J. Med. Chem.,2004.0,"{'bei': '17.60', 'le': '0.36', 'lle': '3.00', ...",CHEMBL130478,,CHEMBL130478,6.10,False,http://www.openphacts.org/units/Nanomolar,252552,=,1,True,=,,IC50,nM,,800.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,0.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7544,,20703835,[],CHEMBL4627889,Inhibition of AChE (unknown origin) using acet...,B,,,BAO_0000190,BAO_0000357,single protein format,COc1ccc(CCC(=O)Nc2nc(-c3cc4ccccc4oc3=O)cs2)cc1OC,,,CHEMBL4627271,Bioorg Med Chem Lett,2020.0,"{'bei': '14.05', 'le': '0.27', 'lle': '1.62', ...",CHEMBL4645659,,CHEMBL4645659,6.13,False,http://www.openphacts.org/units/Nanomolar,3486808,=,1,True,=,,IC50,nM,,740.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,0.74
7545,,20703856,[],CHEMBL4627888,Inhibition of AChE (unknown origin),B,,,BAO_0000190,BAO_0000357,single protein format,COc1ccc(-c2csc(NC(=O)CCN3CCCC3)n2)cc1,,,CHEMBL4627271,Bioorg Med Chem Lett,2020.0,"{'bei': '18.99', 'le': '0.37', 'lle': '3.05', ...",CHEMBL513063,,CHEMBL513063,6.29,False,http://www.openphacts.org/units/Nanomolar,3486809,=,1,True,=,,IC50,nM,,510.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,0.51
7546,,20708928,[],CHEMBL4628756,Inhibition of human AchE,A,,,BAO_0000190,BAO_0000357,single protein format,COc1cc(C2C3=C(CCCC3=O)NC3=C2C(=O)CCC3)ccc1OCc1...,Outside typical range,Values for this activity type are unusually la...,CHEMBL4627331,Bioorg Med Chem Lett,2020.0,,CHEMBL4640608,,CHEMBL4640608,,False,http://www.openphacts.org/units/Nanomolar,3487873,=,1,True,=,,IC50,nM,,125000.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,125.0
7547,,20708929,[],CHEMBL4628756,Inhibition of human AchE,A,,,BAO_0000190,BAO_0000357,single protein format,O=C1CCCC2=C1C(c1ccc(OCc3cccc(F)c3)c(Br)c1)C1=C...,,,CHEMBL4627331,Bioorg Med Chem Lett,2020.0,,CHEMBL4173961,,CHEMBL4173961,,False,http://www.openphacts.org/units/Nanomolar,3487876,>,1,True,>,,IC50,nM,,100000.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,100.0


# Missing data

In [5]:
df2 = df[df.standard_value.notna()]
df2 = df2[df2.canonical_smiles.notna()]
df2

Unnamed: 0,activity_comment,activity_id,activity_properties,assay_chembl_id,assay_description,assay_type,assay_variant_accession,assay_variant_mutation,bao_endpoint,bao_format,bao_label,canonical_smiles,data_validity_comment,data_validity_description,document_chembl_id,document_journal,document_year,ligand_efficiency,molecule_chembl_id,molecule_pref_name,parent_molecule_chembl_id,pchembl_value,potential_duplicate,qudt_units,record_id,relation,src_id,standard_flag,standard_relation,standard_text_value,standard_type,standard_units,standard_upper_value,standard_value,target_chembl_id,target_organism,target_pref_name,target_tax_id,text_value,toid,type,units,uo_units,upper_value,value
0,,33969,[],CHEMBL643384,Inhibitory concentration against acetylcholine...,B,,,BAO_0000190,BAO_0000357,single protein format,CCOc1nn(-c2cccc(OCc3ccccc3)c2)c(=O)o1,,,CHEMBL1148382,J. Med. Chem.,2004.0,"{'bei': '19.61', 'le': '0.36', 'lle': '3.32', ...",CHEMBL133897,,CHEMBL133897,6.12,False,http://www.openphacts.org/units/Nanomolar,252547,=,1,True,=,,IC50,nM,,750.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,0.75
1,,37563,[],CHEMBL643384,Inhibitory concentration against acetylcholine...,B,,,BAO_0000190,BAO_0000357,single protein format,O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC1CC1,,,CHEMBL1148382,J. Med. Chem.,2004.0,"{'bei': '18.57', 'le': '0.38', 'lle': '2.45', ...",CHEMBL336398,,CHEMBL336398,7.00,False,http://www.openphacts.org/units/Nanomolar,252533,=,1,True,=,,IC50,nM,,100.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,0.1
2,,37565,[],CHEMBL643384,Inhibitory concentration against acetylcholine...,B,,,BAO_0000190,BAO_0000357,single protein format,CN(C(=O)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F)c1ccccc1,,,CHEMBL1148382,J. Med. Chem.,2004.0,,CHEMBL131588,,CHEMBL131588,,False,http://www.openphacts.org/units/Nanomolar,252530,>,1,True,>,,IC50,nM,,50000.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,50.0
3,,38902,[],CHEMBL643384,Inhibitory concentration against acetylcholine...,B,,,BAO_0000190,BAO_0000357,single protein format,O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F,,,CHEMBL1148382,J. Med. Chem.,2004.0,"{'bei': '16.11', 'le': '0.34', 'lle': '1.81', ...",CHEMBL130628,,CHEMBL130628,6.52,False,http://www.openphacts.org/units/Nanomolar,252534,=,1,True,=,,IC50,nM,,300.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,0.3
4,,41170,[],CHEMBL643384,Inhibitory concentration against acetylcholine...,B,,,BAO_0000190,BAO_0000357,single protein format,CSc1nc(-c2ccc(OC(F)(F)F)cc2)nn1C(=O)N(C)C,,,CHEMBL1148382,J. Med. Chem.,2004.0,"{'bei': '17.60', 'le': '0.36', 'lle': '3.00', ...",CHEMBL130478,,CHEMBL130478,6.10,False,http://www.openphacts.org/units/Nanomolar,252552,=,1,True,=,,IC50,nM,,800.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,0.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7544,,20703835,[],CHEMBL4627889,Inhibition of AChE (unknown origin) using acet...,B,,,BAO_0000190,BAO_0000357,single protein format,COc1ccc(CCC(=O)Nc2nc(-c3cc4ccccc4oc3=O)cs2)cc1OC,,,CHEMBL4627271,Bioorg Med Chem Lett,2020.0,"{'bei': '14.05', 'le': '0.27', 'lle': '1.62', ...",CHEMBL4645659,,CHEMBL4645659,6.13,False,http://www.openphacts.org/units/Nanomolar,3486808,=,1,True,=,,IC50,nM,,740.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,0.74
7545,,20703856,[],CHEMBL4627888,Inhibition of AChE (unknown origin),B,,,BAO_0000190,BAO_0000357,single protein format,COc1ccc(-c2csc(NC(=O)CCN3CCCC3)n2)cc1,,,CHEMBL4627271,Bioorg Med Chem Lett,2020.0,"{'bei': '18.99', 'le': '0.37', 'lle': '3.05', ...",CHEMBL513063,,CHEMBL513063,6.29,False,http://www.openphacts.org/units/Nanomolar,3486809,=,1,True,=,,IC50,nM,,510.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,0.51
7546,,20708928,[],CHEMBL4628756,Inhibition of human AchE,A,,,BAO_0000190,BAO_0000357,single protein format,COc1cc(C2C3=C(CCCC3=O)NC3=C2C(=O)CCC3)ccc1OCc1...,Outside typical range,Values for this activity type are unusually la...,CHEMBL4627331,Bioorg Med Chem Lett,2020.0,,CHEMBL4640608,,CHEMBL4640608,,False,http://www.openphacts.org/units/Nanomolar,3487873,=,1,True,=,,IC50,nM,,125000.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,125.0
7547,,20708929,[],CHEMBL4628756,Inhibition of human AchE,A,,,BAO_0000190,BAO_0000357,single protein format,O=C1CCCC2=C1C(c1ccc(OCc3cccc(F)c3)c(Br)c1)C1=C...,,,CHEMBL4627331,Bioorg Med Chem Lett,2020.0,,CHEMBL4173961,,CHEMBL4173961,,False,http://www.openphacts.org/units/Nanomolar,3487876,>,1,True,>,,IC50,nM,,100000.0,CHEMBL220,Homo sapiens,Acetylcholinesterase,9606,,,IC50,uM,UO_0000065,,100.0


In [53]:
df2.canonical_smiles.nunique()

5103

In [54]:
df2_nr = df2.drop_duplicates(['canonical_smiles'])

## Preprocessing

In [55]:
selection = ['molecule_chembl_id','canonical_smiles','standard_value']
df3 = df2_nr[selection]

In [56]:
# label as active or inactive
bioactivity_threshold = []
for i in df3.standard_value:
  if float(i) >= 10000:
    bioactivity_threshold.append("inactive")
  elif float(i) <= 1000:
    bioactivity_threshold.append("active")
  else:
    bioactivity_threshold.append("intermediate")
bioactivity_class = pd.Series(bioactivity_threshold, name='class')
df4 = pd.concat([df3, bioactivity_class], axis=1)
df4

Unnamed: 0,molecule_chembl_id,canonical_smiles,standard_value,class
0,CHEMBL133897,CCOc1nn(-c2cccc(OCc3ccccc3)c2)c(=O)o1,750.0,active
1,CHEMBL336398,O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC1CC1,100.0,active
2,CHEMBL131588,CN(C(=O)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F)c1ccccc1,50000.0,inactive
3,CHEMBL130628,O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F,300.0,active
4,CHEMBL130478,CSc1nc(-c2ccc(OC(F)(F)F)cc2)nn1C(=O)N(C)C,800.0,active
...,...,...,...,...
7543,CHEMBL4645476,CN(C)C(=O)Oc1ccc(C(O)CNC2CCCCC2)cc1.Cl,266000.0,
7544,CHEMBL4645659,COc1ccc(CCC(=O)Nc2nc(-c3cc4ccccc4oc3=O)cs2)cc1OC,740.0,
7545,CHEMBL513063,COc1ccc(-c2csc(NC(=O)CCN3CCCC3)n2)cc1,510.0,
7546,CHEMBL4640608,COc1cc(C2C3=C(CCCC3=O)NC3=C2C(=O)CCC3)ccc1OCc1...,125000.0,


# PART 2 - Exploratory Data Analysis

In [57]:
df_no_smiles = df4.drop(columns='canonical_smiles')

In [58]:
smiles = []

for i in df4.canonical_smiles.tolist():
  cpd = str(i).split('.')
  cpd_longest = max(cpd, key = len)
  smiles.append(cpd_longest)

smiles = pd.Series(smiles, name = 'canonical_smiles')
df_clean_smiles = pd.concat([df_no_smiles, smiles], axis=1)
df_clean_smiles

Unnamed: 0,molecule_chembl_id,standard_value,class,canonical_smiles
0,CHEMBL133897,750.0,active,CCOc1nn(-c2cccc(OCc3ccccc3)c2)c(=O)o1
1,CHEMBL336398,100.0,active,O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC1CC1
2,CHEMBL131588,50000.0,inactive,CN(C(=O)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F)c1ccccc1
3,CHEMBL130628,300.0,active,O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F
4,CHEMBL130478,800.0,active,CSc1nc(-c2ccc(OC(F)(F)F)cc2)nn1C(=O)N(C)C
...,...,...,...,...
7543,CHEMBL4645476,266000.0,,
7544,CHEMBL4645659,740.0,,
7545,CHEMBL513063,510.0,,
7546,CHEMBL4640608,125000.0,,


## Calculate Lipinski
The Lipinski's Rule stated the following:
* Molecular weight < 500 Dalton
* Octanol-water partition coefficient (LogP) < 5
* Hydrogen bond donors < 5
* Hydrogen bond acceptors < 10 

In [60]:
def lipinski(smiles):

    moldata= []
    for elem in smiles:
        if elem == 'nan':
            continue
        print(elem)
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_MolWt = Descriptors.MolWt(mol)
        desc_MolLogP = Descriptors.MolLogP(mol)
        desc_NumHDonors = Lipinski.NumHDonors(mol)
        desc_NumHAcceptors = Lipinski.NumHAcceptors(mol)
           
        row = np.array([desc_MolWt,
                        desc_MolLogP,
                        desc_NumHDonors,
                        desc_NumHAcceptors])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["MW","LogP","NumHDonors","NumHAcceptors"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

In [61]:
df_clean_smiles.canonical_smiles.values

array(['CCOc1nn(-c2cccc(OCc3ccccc3)c2)c(=O)o1',
       'O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC1CC1',
       'CN(C(=O)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F)c1ccccc1', ..., nan, nan,
       nan], dtype=object)

In [62]:
df_lipinski = lipinski(df_clean_smiles.canonical_smiles)

CCOc1nn(-c2cccc(OCc3ccccc3)c2)c(=O)o1
O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC1CC1
CN(C(=O)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F)c1ccccc1
O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F
CSc1nc(-c2ccc(OC(F)(F)F)cc2)nn1C(=O)N(C)C
CSc1nc(-c2ccc(C)cc2)nn1C(=O)N(C)c1ccccc1
CSc1nc(-c2ccc(Cl)cc2)nn1C(=O)N(C)C
CCCCCCSc1nc(-c2ccc(Cl)cc2)nn1C(=O)N1CCOCC1
COc1ccc(-c2nc(SC)n(C(=O)N(C)C)n2)cc1
CSc1nc(-c2ccc(OC(F)(F)F)cc2)nn1C(=O)N(C)c1ccccc1
CCSc1nc(-c2ccc(OC)cc2)nn1C(=O)N1CCOCC1
CSc1nc(-c2ccc3ccccc3c2)nn1C(=O)N(C)C
C[C@H]1C(=O)N(C(=O)NCc2ccccc2)[C@@H]1Oc1ccc(C(=O)C(C)(C)C)cc1
CSc1nc(-c2ccc(-c3ccccc3)cc2)nn1C(=O)N(C)C
CSc1nc(/C=C/c2ccccc2)nn1C(=O)N(C)C
CCCCCCSc1nc(-c2ccc(Cl)cc2)nn1C(=O)N1CCCCC1
CSc1nc(-c2ccc(Cl)cc2)nn1C(=O)N(C)c1ccccc1
Cc1c(C(C)C)c(=O)on1C(=O)N1CCC[C@H](C)C1
CCSc1nc(-c2ccc(OC)cc2)nn1C(=O)N(C)c1ccccc1
CCCCCCSc1nc(-c2ccc(C)cc2)nn1C(=O)N(C)c1ccccc1
CSc1nc(-c2ccc(Cl)cc2)nn1C(=O)N1CCCCC1
O=C(N1CCOCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F
CSc1nc(-c2ccc(C)cc2)nn1C(=O)N(C)C
CSc1nc(-c2ccc(Cl)cc2)nn1C(=O)

TypeError: No registered converter was able to produce a C++ rvalue of type class std::basic_string<wchar_t,struct std::char_traits<wchar_t>,class std::allocator<wchar_t> > from this Python object of type float

In [None]:
A = Chem.MolFromSmiles('CCOc1nn(-c2cccc(OCc3ccccc3)c2)c(=O)o1')

In [None]:
df_clean_smiles.canonical_smiles

0                   CCOc1nn(-c2cccc(OCc3ccccc3)c2)c(=O)o1
1              O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC1CC1
2       CN(C(=O)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F)c1ccccc1
3           O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F
4               CSc1nc(-c2ccc(OC(F)(F)F)cc2)nn1C(=O)N(C)C
                              ...                        
7543                                                  NaN
7544                                                  NaN
7545                                                  NaN
7546                                                  NaN
7547                                                  NaN
Name: canonical_smiles, Length: 7431, dtype: object

In [None]:
for ele in df_clean_smiles.canonical_smiles:
    print(ele)

CCOc1nn(-c2cccc(OCc3ccccc3)c2)c(=O)o1
O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC1CC1
CN(C(=O)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F)c1ccccc1
O=C(N1CCCCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F
CSc1nc(-c2ccc(OC(F)(F)F)cc2)nn1C(=O)N(C)C
CSc1nc(-c2ccc(C)cc2)nn1C(=O)N(C)c1ccccc1
CSc1nc(-c2ccc(Cl)cc2)nn1C(=O)N(C)C
CCCCCCSc1nc(-c2ccc(Cl)cc2)nn1C(=O)N1CCOCC1
COc1ccc(-c2nc(SC)n(C(=O)N(C)C)n2)cc1
CSc1nc(-c2ccc(OC(F)(F)F)cc2)nn1C(=O)N(C)c1ccccc1
CCSc1nc(-c2ccc(OC)cc2)nn1C(=O)N1CCOCC1
CSc1nc(-c2ccc3ccccc3c2)nn1C(=O)N(C)C
C[C@H]1C(=O)N(C(=O)NCc2ccccc2)[C@@H]1Oc1ccc(C(=O)C(C)(C)C)cc1
CSc1nc(-c2ccc(-c3ccccc3)cc2)nn1C(=O)N(C)C
CSc1nc(/C=C/c2ccccc2)nn1C(=O)N(C)C
CCCCCCSc1nc(-c2ccc(Cl)cc2)nn1C(=O)N1CCCCC1
CSc1nc(-c2ccc(Cl)cc2)nn1C(=O)N(C)c1ccccc1
Cc1c(C(C)C)c(=O)on1C(=O)N1CCC[C@H](C)C1
CCSc1nc(-c2ccc(OC)cc2)nn1C(=O)N(C)c1ccccc1
CCCCCCSc1nc(-c2ccc(C)cc2)nn1C(=O)N(C)c1ccccc1
CSc1nc(-c2ccc(Cl)cc2)nn1C(=O)N1CCCCC1
O=C(N1CCOCC1)n1nc(-c2ccc(Cl)cc2)nc1SCC(F)(F)F
CSc1nc(-c2ccc(C)cc2)nn1C(=O)N(C)C
CSc1nc(-c2ccc(Cl)cc2)nn1C(=O)