In [None]:
!pip install chembl_webresource_client

In [1]:
from chembl_webresource_client.new_client import new_client
from tqdm import tqdm

# Get Dengue Molecules

In [2]:
# Get the target information for 'dengue'
target = new_client.target
target_query = target.search('dengue')
dengue_targets = target_query.all()
dengue_targets

[{'cross_references': [], 'organism': 'Dengue virus', 'pref_name': 'Dengue virus', 'score': 15.0, 'species_group_flag': False, 'target_chembl_id': 'CHEMBL613757', 'target_components': [], 'target_type': 'ORGANISM', 'tax_id': 12637}, {'cross_references': [], 'organism': 'Dengue virus 1', 'pref_name': 'Dengue virus 1', 'score': 13.0, 'species_group_flag': False, 'target_chembl_id': 'CHEMBL613360', 'target_components': [], 'target_type': 'ORGANISM', 'tax_id': 11053}, {'cross_references': [], 'organism': 'Dengue virus 2', 'pref_name': 'Dengue virus 2', 'score': 13.0, 'species_group_flag': False, 'target_chembl_id': 'CHEMBL613966', 'target_components': [], 'target_type': 'ORGANISM', 'tax_id': 11060}, {'cross_references': [], 'organism': 'Dengue virus 3', 'pref_name': 'Dengue virus 3', 'score': 13.0, 'species_group_flag': False, 'target_chembl_id': 'CHEMBL612717', 'target_components': [], 'target_type': 'ORGANISM', 'tax_id': 11069}, '...(remaining elements truncated)...']

In [3]:
# Print all targets
for t in dengue_targets:
    print(t)

{'cross_references': [], 'organism': 'Dengue virus', 'pref_name': 'Dengue virus', 'score': 15.0, 'species_group_flag': False, 'target_chembl_id': 'CHEMBL613757', 'target_components': [], 'target_type': 'ORGANISM', 'tax_id': 12637}
{'cross_references': [], 'organism': 'Dengue virus 1', 'pref_name': 'Dengue virus 1', 'score': 13.0, 'species_group_flag': False, 'target_chembl_id': 'CHEMBL613360', 'target_components': [], 'target_type': 'ORGANISM', 'tax_id': 11053}
{'cross_references': [], 'organism': 'Dengue virus 2', 'pref_name': 'Dengue virus 2', 'score': 13.0, 'species_group_flag': False, 'target_chembl_id': 'CHEMBL613966', 'target_components': [], 'target_type': 'ORGANISM', 'tax_id': 11060}
{'cross_references': [], 'organism': 'Dengue virus 3', 'pref_name': 'Dengue virus 3', 'score': 13.0, 'species_group_flag': False, 'target_chembl_id': 'CHEMBL612717', 'target_components': [], 'target_type': 'ORGANISM', 'tax_id': 11069}
{'cross_references': [], 'organism': 'Dengue virus 4', 'pref_nam

In [None]:
# Search for molecules that are active against the identified targets
activities = new_client.activity
molecules = []

for t in dengue_targets:
    activity_query = activities.filter(target_chembl_id=t['target_chembl_id']).filter(standard_type="IC50")

    for act in activity_query:
        molecules.append(act)



In [None]:
molecules

[{'action_type': None,
  'activity_comment': None,
  'activity_id': 2927199,
  'activity_properties': [],
  'assay_chembl_id': 'CHEMBL1041197',
  'assay_description': 'Antiviral activity against Dengue virus',
  'assay_type': 'F',
  'assay_variant_accession': None,
  'assay_variant_mutation': None,
  'bao_endpoint': 'BAO_0000190',
  'bao_format': 'BAO_0000218',
  'bao_label': 'organism-based format',
  'canonical_smiles': 'O=C(O)/C=C/c1ccc(OS(=O)(=O)O)cc1',
  'data_validity_comment': 'Outside typical range',
  'data_validity_description': 'Values for this activity type are unusually large/small, so may not be accurate',
  'document_chembl_id': 'CHEMBL1155220',
  'document_journal': 'J Med Chem',
  'document_year': 2009,
  'ligand_efficiency': None,
  'molecule_chembl_id': 'CHEMBL575429',
  'molecule_pref_name': None,
  'parent_molecule_chembl_id': 'CHEMBL575429',
  'pchembl_value': None,
  'potential_duplicate': 0,
  'qudt_units': 'http://www.openphacts.org/units/Nanomolar',
  'record_

In [None]:
# Convert molecules to pandas dataframe
import pandas as pd

df = pd.DataFrame(molecules)
df.head()

Unnamed: 0,action_type,activity_comment,activity_id,activity_properties,assay_chembl_id,assay_description,assay_type,assay_variant_accession,assay_variant_mutation,bao_endpoint,...,target_organism,target_pref_name,target_tax_id,text_value,toid,type,units,uo_units,upper_value,value
0,,,2927199,[],CHEMBL1041197,Antiviral activity against Dengue virus,F,,,BAO_0000190,...,Dengue virus,Dengue virus,12637,,,IC50,mM,UO_0000065,,2.0
1,,,2927200,[],CHEMBL1041197,Antiviral activity against Dengue virus,F,,,BAO_0000190,...,Dengue virus,Dengue virus,12637,,,IC50,mM,UO_0000065,,2.0
2,,,2927201,[],CHEMBL1041197,Antiviral activity against Dengue virus,F,,,BAO_0000190,...,Dengue virus,Dengue virus,12637,,,IC50,mM,UO_0000065,,2.0
3,,,2927202,[],CHEMBL1041197,Antiviral activity against Dengue virus,F,,,BAO_0000190,...,Dengue virus,Dengue virus,12637,,,IC50,uM,UO_0000065,,294.0
4,,,2927203,[],CHEMBL1041197,Antiviral activity against Dengue virus,F,,,BAO_0000190,...,Dengue virus,Dengue virus,12637,,,IC50,uM,UO_0000065,,46.0


In [None]:
df.to_csv("dengue_molecules_raw.csv", index=False)

In [None]:
df_molecules = df[['canonical_smiles','standard_value','type','units',]]
df_molecules.to_csv("dengue_molecules.csv", index=False)

In [None]:

df_molecules = pd.read_csv("dengue_molecules.csv")
df_molecules

Unnamed: 0,canonical_smiles,standard_value,type,units
0,O=C(O)/C=C/c1ccc(OS(=O)(=O)O)cc1,2000000.0,IC50,mM
1,CN(CCCNC(=O)c1ccc(O)cc1)CCCNC(=O)c1ccc(O)cc1,2000000.0,IC50,mM
2,CCN(CCCN(CC)C(=O)c1ccc(O)cc1)C(=O)c1ccc(O)cc1,2000000.0,IC50,mM
3,CN(CCCNC(=O)c1ccc(O)cc1)CCCNC(=O)c1ccc2cc(O)cc...,294000.0,IC50,uM
4,CCN(CCOC(=O)/C=C/c1ccc(O)cc1)Cc1cc(Cl)ccc1O,46000.0,IC50,uM
...,...,...,...,...
1680,Cc1ccc(S(=O)(=O)Nc2ccc(-c3nnc(SCc4ccc(C(F)(F)F...,25200.0,IC50,uM
1681,Cc1ccc(S(=O)(=O)Nc2ccc(-c3nnc(SCc4cccc(C(F)(F)...,23900.0,IC50,uM
1682,Cc1ccc(S(=O)(=O)Nc2ccc(-c3nnc(SCc4ccccc4C(F)(F...,24000.0,IC50,uM
1683,O=C(N[C@@H](Cc1ccc(O)cc1)C(=O)O)c1cc(-c2ccccc2...,9610.0,IC50,uM


# Generate ECFP4 fingerprints

In [None]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from tqdm import tqdm

In [None]:
# Initialize an empty list to store the ECFP4 fingerprints
ecfp4_fingerprints = []

# Iterate over the DataFrame
for index, row in df_molecules.iterrows():
    mol = Chem.MolFromSmiles(row['canonical_smiles'])
    
    # Ensure the molecule was successfully created
    if mol:
        # Generate the ECFP4 fingerprint as a BitVector
        ecfp4 = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=2048)
        # Convert the ECFP4 fingerprint to a list of bits
        ecfp4_bits = list(ecfp4.ToBitString())
        ecfp4_fingerprints.append(ecfp4_bits)
    else:
        ecfp4_fingerprints.append(None)

# Assign the ECFP4 fingerprints to a new column in the DataFrame
df_molecules['ECFP4'] = ecfp4_fingerprints

In [None]:
df_molecules.head()

Unnamed: 0,canonical_smiles,standard_value,type,units,ECFP4
0,O=C(O)/C=C/c1ccc(OS(=O)(=O)O)cc1,2000000.0,IC50,mM,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,CN(CCCNC(=O)c1ccc(O)cc1)CCCNC(=O)c1ccc(O)cc1,2000000.0,IC50,mM,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ..."
2,CCN(CCCN(CC)C(=O)c1ccc(O)cc1)C(=O)c1ccc(O)cc1,2000000.0,IC50,mM,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,CN(CCCNC(=O)c1ccc(O)cc1)CCCNC(=O)c1ccc2cc(O)cc...,294000.0,IC50,uM,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ..."
4,CCN(CCOC(=O)/C=C/c1ccc(O)cc1)Cc1cc(Cl)ccc1O,46000.0,IC50,uM,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."


In [None]:
df_molecules.to_csv("dengue_molecules.csv", index=False)

# Activities

1. **IC50 (Half Maximal Inhibitory Concentration)**:
    - It represents the concentration of a drug or compound required to inhibit a biological process by 50%.
    - Commonly used in drug discovery to measure the potency of a drug against a specific target, such as an enzyme, receptor, or cell.
    - A lower IC50 value indicates higher potency.

2. **EC50 (Half Maximal Effective Concentration)**:
    - It represents the concentration of a drug or compound required to produce 50% of its maximal effect.
    - Used to describe the potency of an agonist or activator.
    - Like IC50, a lower EC50 value indicates higher potency.

3. **Inhibition**:
    - This term generally refers to the reduction or prevention of a biological process by a drug or compound.
    - Inhibition can be competitive (binds to the same site as the substrate), non-competitive (binds to a different site), or uncompetitive (binds only to the enzyme-substrate complex).

4. **EC90 (90% Effective Concentration)**:
    - The concentration of a drug or compound required to produce 90% of its maximal effect.
    - Indicates a higher level of efficacy than EC50.
    - It's less commonly reported than EC50 but can provide a clearer picture of a drug's efficacy at higher concentrations.

5. **IC90 (90% Inhibitory Concentration)**:
    - The concentration of a drug or compound required to inhibit a biological process by 90%.
    - Represents a higher level of inhibition than IC50.
    - It's less commonly reported than IC50 but can be useful to understand the drug's activity at higher concentrations.

6. **Activity**:
    - In the context of drug discovery, activity usually refers to the action or effect of a drug or compound on a biological target.
    - It can describe the level of inhibition, activation, binding, or any other measurable effect.
