# Drug name to ChEMBL function

Single function using chembl_webresource_client: https://pypi.org/project/chembl-webresource-client/

Tries 3 different things (in order):

1. Case insensitive match against molecule_dictionary.pref_name
2. Case insensitive match against molecule_synonyms.synonyms
3. Use elastic search as a last resort (optional)

Note: not all pref_name are included in molecule_synonyms so it's not possible to skip step 1.

A name can match against many chembl compounds. Manual curation should be considered under this situation (reason to keep smiles, inchi and inchi key).

In [None]:
# install the webresource client
!pip install chembl_webresource_client

In [2]:
from chembl_webresource_client.new_client import new_client

def name2chembl(name, use_search=False):
    """
    Tries to retrieve the chembl_id and the structure for given a drug name.
    """
    # search in pref_name
    # iexact does exact case insensitive search
    molecule = new_client.molecule
    res = list(molecule.filter(pref_name__iexact=name))
    if res:
        return [[x['molecule_chembl_id'],
                 x['pref_name'],
                 x['molecule_structures']['canonical_smiles'],
                 x['molecule_structures']['standard_inchi'],
                 x['molecule_structures']['standard_inchi_key']] for x in res], 'pref_name'
    else:
        # if pref_name matches, look the synonyms
        # some pref_name are not included in molecule_synonyms so is not possible
        # to go directly to this step
        res = list(molecule.filter(molecule_synonyms__molecule_synonym__iexact=name))
        if res:
            return [[x['molecule_chembl_id'], 
                     x['pref_name'],
                     x['molecule_structures']['canonical_smiles'],
                     x['molecule_structures']['standard_inchi'],
                     x['molecule_structures']['standard_inchi_key']] for x in res], 'synonyms'
        else:
            if use_search:
                # last resort:
                #   search function uses elastic and could eventually retrieve inexact matches
                #   can also take a while
                res = [molecule.search(name)[0]]
                if res:
                    return [[x['molecule_chembl_id'],
                             x['pref_name'],
                             x['molecule_structures']['canonical_smiles'],
                             x['molecule_structures']['standard_inchi'],
                             x['molecule_structures']['standard_inchi_key']] for x in res], 'search'
    return None

# Example getting matches from molecule_dictionary.pref_name

In [3]:
matches, where = name2chembl('sildenafil', use_search=True)

print(where)
matches

pref_name


[['CHEMBL192',
  'SILDENAFIL',
  'CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12',
  'InChI=1S/C22H30N6O4S/c1-5-7-17-19-20(27(4)25-17)22(29)24-21(23-19)16-14-15(8-9-18(16)32-6-2)33(30,31)28-12-10-26(3)11-13-28/h8-9,14H,5-7,10-13H2,1-4H3,(H,23,24,29)',
  'BNRNXUUZRGQAQC-UHFFFAOYSA-N']]

# Example getting matches from molecule_dictionary.synonyms

In [4]:
matches, where = name2chembl('viagra', use_search=True)

print(where)
matches

synonyms


[['CHEMBL192',
  'SILDENAFIL',
  'CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12',
  'InChI=1S/C22H30N6O4S/c1-5-7-17-19-20(27(4)25-17)22(29)24-21(23-19)16-14-15(8-9-18(16)32-6-2)33(30,31)28-12-10-26(3)11-13-28/h8-9,14H,5-7,10-13H2,1-4H3,(H,23,24,29)',
  'BNRNXUUZRGQAQC-UHFFFAOYSA-N'],
 ['CHEMBL1737',
  'SILDENAFIL CITRATE',
  'CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12.O=C(O)CC(O)(CC(=O)O)C(=O)O',
  'InChI=1S/C22H30N6O4S.C6H8O7/c1-5-7-17-19-20(27(4)25-17)22(29)24-21(23-19)16-14-15(8-9-18(16)32-6-2)33(30,31)28-12-10-26(3)11-13-28;7-3(8)1-6(13,5(11)12)2-4(9)10/h8-9,14H,5-7,10-13H2,1-4H3,(H,23,24,29);13H,1-2H2,(H,7,8)(H,9,10)(H,11,12)',
  'DEIYFTQMQPDXOT-UHFFFAOYSA-N']]

# Example getting a match using the search feature

In [5]:
matches, where = name2chembl('Azaguanine-8', use_search=True)

print(where)
matches

search


[['CHEMBL374107',
  '8-AZAGUANINE',
  'Nc1nc(O)c2[nH]nnc2n1',
  'InChI=1S/C4H4N6O/c5-4-6-2-1(3(11)7-4)8-10-9-2/h(H4,5,6,7,8,9,10,11)',
  'LPXQRXLUHJKZIE-UHFFFAOYSA-N']]