## Installing chembl_websource client used to extract bioactivity values from the specific targets

In [None]:
pip install chembl_webresource_client


Note: you may need to restart the kernel to use updated packages.


## Importing the required Libraries

In [None]:
import pandas as pd
from chembl_webresource_client.new_client import new_client
import pandas as pd
import numpy as np




# Target search for Coronavirus


In [None]:
# Target search for coronavirus
target = new_client.target
target_query = target.search('coronavirus')
targets = pd.DataFrame.from_dict(target_query)
targets

Unnamed: 0,cross_references,organism,pref_name,score,species_group_flag,target_chembl_id,target_components,target_type,tax_id
0,[],Coronavirus,Coronavirus,17.0,False,CHEMBL613732,[],ORGANISM,11119
1,[],SARS coronavirus,SARS coronavirus,14.0,False,CHEMBL612575,[],ORGANISM,227859
2,[],Feline coronavirus,Feline coronavirus,14.0,False,CHEMBL612744,[],ORGANISM,12663
3,[],Human coronavirus 229E,Human coronavirus 229E,13.0,False,CHEMBL613837,[],ORGANISM,11137
4,"[{'xref_id': 'P0C6U8', 'xref_name': None, 'xre...",SARS coronavirus,SARS coronavirus 3C-like proteinase,10.0,False,CHEMBL3927,"[{'accession': 'P0C6U8', 'component_descriptio...",SINGLE PROTEIN,227859
5,[],Middle East respiratory syndrome-related coron...,Middle East respiratory syndrome-related coron...,9.0,False,CHEMBL4296578,[],ORGANISM,1335626
6,"[{'xref_id': 'P0C6X7', 'xref_name': None, 'xre...",SARS coronavirus,Replicase polyprotein 1ab,5.0,False,CHEMBL5118,"[{'accession': 'P0C6X7', 'component_descriptio...",SINGLE PROTEIN,227859


# Selecting and retrieving bioactivity data for Replicase polyprotein 1ab

In [None]:
selected_target = targets.target_chembl_id[6]
selected_target

'CHEMBL5118'

### Data will only be retrieved  for the above replicase polyprotein 1ab that have IC50 values in nM unit.

In [None]:
activity = new_client.activity

res = activity.filter(target_chembl_id=selected_target).filter(standard_type="IC50")

In [None]:

df6 = pd.DataFrame.from_dict(res)


In [None]:
df6.head(4)

Unnamed: 0,activity_comment,activity_id,activity_properties,assay_chembl_id,assay_description,assay_type,bao_endpoint,bao_format,bao_label,canonical_smiles,...,target_organism,target_pref_name,target_tax_id,text_value,toid,type,units,uo_units,upper_value,value
0,,1988091,[],CHEMBL898907,Inhibition of SARS-CoV 3C-like protease by FRE...,B,BAO_0000190,BAO_0000019,assay format,CCOC(=O)/C=C/[C@H](C[C@@H]1CCNC1=O)NC(=O)[C@@H...,...,SARS coronavirus,Replicase polyprotein 1ab,227859,,,IC50,uM,UO_0000065,,870.0
1,,1988092,[],CHEMBL898907,Inhibition of SARS-CoV 3C-like protease by FRE...,B,BAO_0000190,BAO_0000019,assay format,CCOC(=O)/C=C/[C@H](C[C@@H]1CCNC1=O)NC(=O)[C@H]...,...,SARS coronavirus,Replicase polyprotein 1ab,227859,,,IC50,uM,UO_0000065,,200.0
2,,1988093,[],CHEMBL898907,Inhibition of SARS-CoV 3C-like protease by FRE...,B,BAO_0000190,BAO_0000019,assay format,CCOC(=O)/C=C/[C@H](C[C@@H]1CCNC1=O)NC(=O)[C@H]...,...,SARS coronavirus,Replicase polyprotein 1ab,227859,,,IC50,uM,UO_0000065,,300.0
3,,1988094,[],CHEMBL898907,Inhibition of SARS-CoV 3C-like protease by FRE...,B,BAO_0000190,BAO_0000019,assay format,CCOC(=O)/C=C/[C@H](C[C@@H]1CCNC1=O)NC(=O)[C@H]...,...,SARS coronavirus,Replicase polyprotein 1ab,227859,,,IC50,uM,UO_0000065,,15.0


## Conversion to dataframe

In [None]:
df=pd.concat([df6], ignore_index=True)

## Creating the csv files from the extracted bioactivity values

In [None]:
df.to_csv('replicase1_temp.csv')

In [None]:
df= df.drop(columns=['activity_comment','activity_id','activity_properties','assay_chembl_id','assay_description','assay_type','bao_endpoint','bao_format','bao_label','data_validity_comment','data_validity_description','document_chembl_id','document_journal','document_year','ligand_efficiency','potential_duplicate','qudt_units','record_id','parent_molecule_chembl_id','relation','src_id','standard_flag','standard_relation','standard_text_value','pchembl_value','standard_upper_value','target_tax_id','toid','uo_units','upper_value','text_value'])

We will save the resulting data to a CSV file replicase1.csv

In [None]:
df.to_csv('replicase1.csv',index=False)

In [None]:
df.dropna(subset=['standard_type','standard_value','type','units','value'], inplace=True)
df

Unnamed: 0,canonical_smiles,molecule_chembl_id,molecule_pref_name,standard_type,standard_units,standard_value,target_chembl_id,target_organism,target_pref_name,type,units,value
0,CCOC(=O)/C=C/[C@H](C[C@@H]1CCNC1=O)NC(=O)[C@@H...,CHEMBL194398,,IC50,nM,870000.0,CHEMBL5118,SARS coronavirus,Replicase polyprotein 1ab,IC50,uM,870.0
1,CCOC(=O)/C=C/[C@H](C[C@@H]1CCNC1=O)NC(=O)[C@H]...,CHEMBL393608,,IC50,nM,200000.0,CHEMBL5118,SARS coronavirus,Replicase polyprotein 1ab,IC50,uM,200.0
2,CCOC(=O)/C=C/[C@H](C[C@@H]1CCNC1=O)NC(=O)[C@H]...,CHEMBL238216,,IC50,nM,300000.0,CHEMBL5118,SARS coronavirus,Replicase polyprotein 1ab,IC50,uM,300.0
3,CCOC(=O)/C=C/[C@H](C[C@@H]1CCNC1=O)NC(=O)[C@H]...,CHEMBL235873,,IC50,nM,15000.0,CHEMBL5118,SARS coronavirus,Replicase polyprotein 1ab,IC50,uM,15.0
4,CCOC(=O)/C=C/[C@H](C[C@@H]1CCNC1=O)NC(=O)[C@H]...,CHEMBL397154,,IC50,nM,10000.0,CHEMBL5118,SARS coronavirus,Replicase polyprotein 1ab,IC50,uM,10.0
...,...,...,...,...,...,...,...,...,...,...,...,...
210,CC(C)C[C@H](NC(=O)OC1(Cc2ccccc2)CCN(S(C)(=O)=O...,CHEMBL4208764,,IC50,nM,4300.0,CHEMBL5118,SARS coronavirus,Replicase polyprotein 1ab,IC50,uM,4.3
211,CCC1(OC(=O)N[C@@H](CC(C)C)C(=O)N[C@H](C=O)C[C@...,CHEMBL4212620,,IC50,nM,5500.0,CHEMBL5118,SARS coronavirus,Replicase polyprotein 1ab,IC50,uM,5.5
212,CCC1(OC(=O)N[C@@H](CC(C)C)C(=O)N[C@@H](C[C@@H]...,CHEMBL4216101,,IC50,nM,4100.0,CHEMBL5118,SARS coronavirus,Replicase polyprotein 1ab,IC50,uM,4.1
213,CCOC(=O)N1CCC(OC(=O)N[C@@H](CC(C)C)C(=O)N[C@H]...,CHEMBL4217568,,IC50,nM,3200.0,CHEMBL5118,SARS coronavirus,Replicase polyprotein 1ab,IC50,uM,3.2
