In [None]:
! pip install chembl_webresource_client

import pandas as pd
from chembl_webresource_client.new_client import new_client

Endometriosis is associated with progesterone resistance: https://pmc.ncbi.nlm.nih.gov/articles/PMC9687824/. 
Therefore, in drug discoveries to relieve endometriosis, we're looking for substances that can activate Progesterone receptors and improve binding

In [None]:
target = new_client.target
target_query = target.search('progesterone') 
targets = pd.DataFrame.from_dict(target_query)
targets

Using id[0] to return the protein associated with homo sapien. 

In [None]:
selected = targets.target_chembl_id[0]
activity = new_client.activity
res = activity.filter(target_chembl_id = selected).filter(standard_type = "IC50") #IC50 means that the potency is measured by amount needed to halt biological processes by 50%: https://pubmed.ncbi.nlm.nih.gov/27365221/

df = pd.DataFrame.from_dict(res)[lambda d: d.standard_value.notna()] #need standard value to determine potency
df.head(3)

In [None]:
#adding activity label for ML

act_class = []
for i in df.standard_value:
    if float(i) >= 10000:
        act_class.append("inactive")
    elif float(i) <= 1000:
        act_class.append("active")
    else:
        act_class.append("intermediate")


#combining into df
finaldf = df[['molecule_chembl_id', 'canonical_smiles', 'standard_value']] #canonical smile is another way to depict molecular structure in text
pd.concat([finaldf, pd.Series(act_class)], axis = 1)
