## Import necessary libraries

In [12]:
from chembl_webresource_client.new_client import new_client
import pandas as pd

## Find target proteins

In [36]:
# there are 4 species of malaria that cause issues in humans, so we'll search plasmodium

target_query = new_client.target.search('Plasmodium')
plasmodium_targets = pd.DataFrame.from_dict(target_query)

# find single_protein bc complex/organism is not in the scope
# pd.set_option('display.max_columns', 100)
single_protein_targets = plasmodium_targets[plasmodium_targets['target_type'] == "SINGLE PROTEIN"]
single_protein_targets

Unnamed: 0,cross_references,organism,pref_name,score,species_group_flag,target_chembl_id,target_components,target_type,tax_id
0,[],Homo sapiens,Duffy antigen/chemokine receptor,15.0,False,CHEMBL2321626,"[{'accession': 'Q16570', 'component_descriptio...",SINGLE PROTEIN,9606
21,"[{'xref_id': 'P13922', 'xref_name': None, 'xre...",Plasmodium falciparum K1,Dihydrofolate reductase,7.0,False,CHEMBL1939,"[{'accession': 'P13922', 'component_descriptio...",SINGLE PROTEIN,5839
22,"[{'xref_id': 'Q02768', 'xref_name': None, 'xre...",Plasmodium falciparum,Cytochrome b,7.0,False,CHEMBL1777,"[{'accession': 'Q02768', 'component_descriptio...",SINGLE PROTEIN,5833
23,"[{'xref_id': 'P05227', 'xref_name': None, 'xre...",Plasmodium falciparum,Histidine-rich protein,7.0,False,CHEMBL1923,"[{'accession': 'P05227', 'component_descriptio...",SINGLE PROTEIN,5833
24,"[{'xref_id': 'Q25704', 'xref_name': None, 'xre...",Plasmodium falciparum,Dihydropteroate synthetase,7.0,False,CHEMBL2013,"[{'accession': 'Q25704', 'component_descriptio...",SINGLE PROTEIN,5833
...,...,...,...,...,...,...,...,...,...
130,[],Plasmodium falciparum (isolate 3D7),Plasmepsin X,7.0,False,CHEMBL4523390,"[{'accession': 'Q8IAS0', 'component_descriptio...",SINGLE PROTEIN,36329
131,[],Plasmodium falciparum (isolate 3D7),Casein kinase I,7.0,False,CHEMBL4523391,"[{'accession': 'Q8IHZ9', 'component_descriptio...",SINGLE PROTEIN,36329
132,[],Plasmodium falciparum,Glutamine amidotransferase,7.0,False,CHEMBL4523484,"[{'accession': 'Q8IJR9', 'component_descriptio...",SINGLE PROTEIN,5833
133,[],Plasmodium falciparum,P-type ATPase,7.0,False,CHEMBL4630875,"[{'accession': 'Q27724', 'component_descriptio...",SINGLE PROTEIN,5833


Now, I looked through found the specific protein that to use for the machine learning. 

I chose Dihydrofolate reductase (DHFR) for a variety of reasons, but the main ones include:
- it has been targeted before in malaria drug treatment
- it is crucial to malaria spreading
- it is in all 4 parasites

In [40]:
target_protein = single_protein_targets.target_chembl_id[21]
target_protein

'CHEMBL1939'

## Get activity data 

In [43]:
activity = new_client.activity
activity_data = activity.filter(target_chembl_id=target_protein).filter(standard_type="IC50")
display = pd.DataFrame.from_dict(activity_data)

display

Unnamed: 0,action_type,activity_comment,activity_id,activity_properties,assay_chembl_id,assay_description,assay_type,assay_variant_accession,assay_variant_mutation,bao_endpoint,bao_format,bao_label,canonical_smiles,data_validity_comment,data_validity_description,document_chembl_id,document_journal,document_year,ligand_efficiency,molecule_chembl_id,molecule_pref_name,parent_molecule_chembl_id,pchembl_value,potential_duplicate,qudt_units,record_id,relation,src_id,standard_flag,standard_relation,standard_text_value,standard_type,standard_units,standard_upper_value,standard_value,target_chembl_id,target_organism,target_pref_name,target_tax_id,text_value,toid,type,units,uo_units,upper_value,value
0,,,188717,[],CHEMBL769665,In vitro antimalarial activity against Plasmod...,F,,,BAO_0000190,BAO_0000019,assay format,CCCCOc1cc(Cc2cnc(N)nc2N)ccc1OCc1cc(OC)c(OC)c(O...,,,CHEMBL1148379,J Med Chem,2004,,CHEMBL416373,,CHEMBL416373,6.48,0,http://www.openphacts.org/units/Nanomolar,224312,=,1,1,=,,IC50,nM,,330.0,CHEMBL1939,Plasmodium falciparum K1,Dihydrofolate reductase,5839,,,IC50,uM,UO_0000065,,0.33
1,,,188718,[],CHEMBL769666,In vitro antimalarial activity relative to tri...,F,,,BAO_0000190,BAO_0000019,assay format,CCCCOc1cc(Cc2cnc(N)nc2N)ccc1OCc1cc(OC)c(OC)c(O...,,,CHEMBL1148379,J Med Chem,2004,,CHEMBL416373,,CHEMBL416373,7.30,0,http://www.openphacts.org/units/Nanomolar,224312,=,1,1,=,,IC50,nM,,50.0,CHEMBL1939,Plasmodium falciparum K1,Dihydrofolate reductase,5839,,,IC50,uM,UO_0000065,,0.05
2,,,188719,[],CHEMBL769492,In vitro antimalarial activity against Plasmod...,F,,,BAO_0000190,BAO_0000019,assay format,CCCCOc1cc(Cc2cnc(N)nc2N)ccc1OCc1cc(OC)c(OC)c(O...,,,CHEMBL1148379,J Med Chem,2004,,CHEMBL416373,,CHEMBL416373,4.99,0,http://www.openphacts.org/units/Nanomolar,224312,=,1,1,=,,IC50,nM,,10160.0,CHEMBL1939,Plasmodium falciparum K1,Dihydrofolate reductase,5839,,,IC50,uM,UO_0000065,,10.16
3,,,188720,[],CHEMBL769664,In vitro antimalarial activity against Plasmod...,F,,,BAO_0000190,BAO_0000019,assay format,CCCCOc1cc(Cc2cnc(N)nc2N)ccc1OCc1cc(OC)c(OC)c(O...,,,CHEMBL1148379,J Med Chem,2004,,CHEMBL416373,,CHEMBL416373,7.16,0,http://www.openphacts.org/units/Nanomolar,224312,=,1,1,=,,IC50,nM,,70.0,CHEMBL1939,Plasmodium falciparum K1,Dihydrofolate reductase,5839,,,IC50,uM,UO_0000065,,0.07
4,,,188721,[],CHEMBL769484,In vitro antimalarial activity against Plasmod...,F,,,BAO_0000190,BAO_0000019,assay format,CCCCOc1cc(Cc2cnc(N)nc2N)ccc1OCc1cc(OC)c(OC)c(O...,,,CHEMBL1148379,J Med Chem,2004,,CHEMBL416373,,CHEMBL416373,5.47,0,http://www.openphacts.org/units/Nanomolar,224312,=,1,1,=,,IC50,nM,,3400.0,CHEMBL1939,Plasmodium falciparum K1,Dihydrofolate reductase,5839,,,IC50,uM,UO_0000065,,3.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
368,,,18465879,[],CHEMBL4181680,Inhibition of Plasmodium falciparum DHFR using...,B,,,BAO_0000190,BAO_0000357,single protein format,Nc1nc(N)c2cc(NCc3ccc(CCNC(=O)c4ccc5ccccc5c4)cc...,,,CHEMBL4177676,Bioorg Med Chem,2017,"{'bei': '15.50', 'le': '0.28', 'lle': '2.64', ...",CHEMBL4213293,,CHEMBL4213293,7.17,0,http://www.openphacts.org/units/Nanomolar,3064704,=,1,1,=,,IC50,nM,,67.9,CHEMBL1939,Plasmodium falciparum K1,Dihydrofolate reductase,5839,,,IC50,nM,UO_0000065,,67.9
369,,,18465880,[],CHEMBL4181680,Inhibition of Plasmodium falciparum DHFR using...,B,,,BAO_0000190,BAO_0000357,single protein format,Nc1nc(N)c2cc(NCc3ccc(CCNC(=O)Cc4ccccc4)cc3)ccc2n1,,,CHEMBL4177676,Bioorg Med Chem,2017,"{'bei': '16.77', 'le': '0.31', 'lle': '3.84', ...",CHEMBL4203015,,CHEMBL4203015,7.15,0,http://www.openphacts.org/units/Nanomolar,3064705,=,1,1,=,,IC50,nM,,70.1,CHEMBL1939,Plasmodium falciparum K1,Dihydrofolate reductase,5839,,,IC50,nM,UO_0000065,,70.1
370,,,18465881,[],CHEMBL4181680,Inhibition of Plasmodium falciparum DHFR using...,B,,,BAO_0000190,BAO_0000357,single protein format,Nc1nc(N)c2cc(NCc3ccc(CCNC(=O)CCc4ccccc4)cc3)cc...,,,CHEMBL4177676,Bioorg Med Chem,2017,"{'bei': '15.28', 'le': '0.28', 'lle': '3.03', ...",CHEMBL4211317,,CHEMBL4211317,6.73,0,http://www.openphacts.org/units/Nanomolar,3064706,=,1,1,=,,IC50,nM,,185.4,CHEMBL1939,Plasmodium falciparum K1,Dihydrofolate reductase,5839,,,IC50,nM,UO_0000065,,185.4
371,,,18465882,[],CHEMBL4181680,Inhibition of Plasmodium falciparum DHFR using...,B,,,BAO_0000190,BAO_0000357,single protein format,Nc1nc(N)c2cc(NCc3ccc(CCNC(=O)C4CCCCC4)cc3)ccc2n1,,,CHEMBL4177676,Bioorg Med Chem,2017,"{'bei': '15.88', 'le': '0.29', 'lle': '3.00', ...",CHEMBL4212687,,CHEMBL4212687,6.65,0,http://www.openphacts.org/units/Nanomolar,3064707,=,1,1,=,,IC50,nM,,225.5,CHEMBL1939,Plasmodium falciparum K1,Dihydrofolate reductase,5839,,,IC50,nM,UO_0000065,,225.5


## Get descriptors/fingerprint data for them