## Argonne Covid Database

This notebook will get you started with making requests to the Argonne Covid Database. You will need a few items to get started:

1. A whitelisted IP
    * Currently, only users from LLNL, Argonne, and BNL may make requests to this database.
1. A user account
    * You must ask for a user account before you can make queries.
    
The first few notebooks will ensure you can access the database. After that, you will be guided through a range of simple queries.

In [1]:
import requests
import json
import urllib3
import getpass
import hashlib

COVID_URL = 'https://covid-db-01.alcf.anl.gov'
RQARGS = {
    # Connections are only available from select IP Addresses, timeout after a few seconds.
    'timeout': 3,
    # Certs are not yet setup for the above host
    'verify': False
}

In [2]:
try:
    requests.get(COVID_URL, **RQARGS)
    print('Connection Successful, please proceed')
except requests.exceptions.Timeout:
    print(f'Connection Failed, {COVID_URL} is only accessible '
          'from select ip ranges at LLNL, Argonne, and BNL. Please try again using a different IP.')

Connection Failed, https://covid-ws-01.alcf.anl.gov is only accessible from select ip ranges at LLNL, Argonne, and BNL. Please try again using a different IP.


In [None]:
payload = {
    'email': input('Enter your email (EX: person@anl.gov) > '), 
    'password': getpass.getpass(),
}
token = ''
response = requests.post(f'{COVID_URL}/rpc/login', data=payload, **RQARGS)
if response != 200:
    print(f'Login Failed with {payload["email"]}: {response.json()["message"]}')
    print('Please send the message above to your administrator.')
else:
    print('Authentication Successful, you may now make requests to the database.')
    token = response.json()[0]['token']
headers = {'Authorization': f'bearer {token}'}

In [None]:
smile = """CCC(COC(=O)C(NP(=O)(Oc1ccccc1)OCC1OC(C(C1O)O)(C#N)c1ccc2n1ncnc2N)C)CCCCN(CCCC(Nc1ccnc2c1ccc(c2)Cl)C)CCCC(=O)Oc1ccccc1C(=O)Nc1ncc(s1)[N+](=O)[O-]NC(=Nc1ccc(cc1)C(=O)Oc1ccc2c(c1)ccc(c2)C(=N)N)N"""
params = {
    'md5': 'eq.{}'.format(hashlib.md5(smile.encode()).hexdigest())
}
requests.post(f'{COVID_URL}/dup_m2k', params=params, headers=headers, **RQARGS).json()

In [6]:
import requests
import urllib3
import hashlib

# PYTHON 3.7
# There is an issue with the SSL certs from the target server.
# Because of this verification of these certs needs to be turned off
# during requests. Turning this off generates a warning which the below
# line turns off
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

COVID_URL = 'https://covid-ws-01.alcf.anl.gov'


# ==================
# INTERNAL FUNCTIONS
# ==================

# Token is the string from get_token()
# table_name is the target of the get
# Params is a dict of get options.
#  ex: {"name":"Daniel", "id": 123456} -> ?name=Daniel&id=123456
def _get_json(token, table_name, params):
    url = COVID_URL + '/' + table_name
    # build auth header with token
    h = {'Authorization': 'Bearer ' + token}
    # Note that we are turning off SSL verification here.
    response = requests.get(url, headers=h, params=params, verify=False)
    # print(response.url)
    # Check the response code and handle it
    if response.status_code != 200:
        print("request faild: {0}".format(response.text))
    return response.json()


# Turn a given list or single object into a postgres REST query
# key is the name of the parameter
def _list_to_query(key, objs):
    if not isinstance(objs, list):
        return {key: 'eq.{0}'.format(objs)}
    else:
        query = 'in.('
        for o in objs:
            query += '{0},'.format(o)
        query += ')'
        return {key: query}

    
def _str_to_md5_hex(s):
    # converts a string into a md5 hex string
    return hashlib.md5(s.encode()).hexdigest()


# Given a set of strings turn them into a md5 lookup table
# md5 -> string
def _to_md5_table(strings):
    table = {}
    if not isinstance(strings, list):
        key = _str_to_md5_hex(strings)
        table[key] = strings
    else:
        for s in strings:
            key = _str_to_md5_hex(s)
            table[key] = s
    return table


# given the json results from dup_m2i and a dict with md5 values as the keys
# process results keyed by the values of md5_lookup
def _process_m2i_table_data(data, md5_lookup):
    json_r = {}
    for r in data:
        try:
            ids = json_r[md5_lookup[r['md5']]]
        except KeyError:
            param, val = r['ide'].split(':')
            json_r[md5_lookup[r['md5']]] = {param: val}
        else:
            param, val = r['ide'].split(':')
            ids[param] = val
    return json_r


def _process_m2s_table_data(data, md5_lookup, key):
    json_r = []
    for d in data:
        json_r.append({key: md5_lookup[d['md5']], 'smi': d['smi']})
    return json_r


def _process_m2k_table_data(data, md5_lookup, key):
    json_r = []
    for d in data:
        json_r.append({key: md5_lookup[d['md5']], 'key': d['key']})
    return json_r


# Given a list of data turn it into a lookup table of key -> val
def _to_lookup(data, key, val):
    lookup = {}
    for d in data:
        lookup[d[key]] = d[val]
    return lookup


# ==================
# EXTERNAL FUNCTIONS
# ==================
# email and password of user accessing endpoint
def get_token(email, password):
    login = COVID_URL + '/rpc/login'
    payload = {'email': email, 'pass': password}
    # Note that we are turning off SSL verification here.
    response = requests.post(login, data=payload, verify=False)
    # Check the response code and handle it
    if response.status_code != 200:
        print("request faild: {0}".format(response.text))
    # Get the json object then spit out the token
    return response.json()[0]['token']


# given a smi or set of smi strings get there keys
def smiles2key(token, smi):
    md5_smi = _to_md5_table(smi)
    params = _list_to_query('md5', [*md5_smi])
    results = _get_json(token, 'dup_m2k', params)
    # return list of smi -> key pairs
    return _process_m2k_table_data(results, md5_smi, 'smi')


def smiles2id(token, smi):
    md5_smi = _to_md5_table(smi)
    params = _list_to_query('md5', [*md5_smi])
    results = _get_json(token, 'dup_m2i', params)
    return _process_m2i_table_data(results, md5_smi)


def smiles2inchi(token, smi):
    keys = smiles2key(token, smi)
    keys_lookup = _to_lookup(keys, 'key', 'smi')
    params = _list_to_query('key', [*keys_lookup])
    results = _get_json(token, 'dup_k2n', params)
    # Post work
    json_r = []
    for r in results:
        json_r.append({'smi': keys_lookup[r['key']], 'inc': r['inc']})
    return json_r


def key2smiles(token, keys):
    params = _list_to_query('key', keys)
    md5_results = _get_json(token, 'dup_m2k', params)
    # Make lookup table
    md5_lookup = _to_lookup(md5_results, 'md5', 'key')
    params = _list_to_query('md5', [*md5_lookup])
    results = _get_json(token, 'dup_m2s', params)
    # POST
    return _process_m2s_table_data(results, md5_lookup, 'key')


def key2inchi(token, keys):
    params = _list_to_query('key', keys)
    results = _get_json(token, 'dup_k2n', params)
    return results


def key2id(token, keys):
    params = _list_to_query('key', keys)
    md5_results = _get_json(token, 'dup_m2k', params)
    # Make lookup table
    md5_lookup = _to_lookup(md5_results, 'md5', 'key')
    params = _list_to_query('md5', [*md5_lookup])
    results = _get_json(token, 'dup_m2i', params)
    return _process_m2i_table_data(results, md5_lookup)


def id2smiles(token, ids):
    param = _list_to_query('ide', ids)
    results = _get_json(token, 'dup_m2i', param)
    md5_lookup = _to_lookup(results, 'md5', 'ide')
    param = _list_to_query('md5', [*md5_lookup])
    results = _get_json(token, 'dup_m2s', param)
    # POST
    return _process_m2s_table_data(results, md5_lookup, 'ide')


def id2key(token, ids):
    param = _list_to_query('ide', ids)
    results = _get_json(token, 'dup_m2i', param)
    md5_lookup = _to_lookup(results, 'md5', 'ide')
    param = _list_to_query('md5', [*md5_lookup])
    results = _get_json(token, 'dup_m2k', param)
    return _process_m2k_table_data(results, md5_lookup, 'ide')


def id2inchi(token, ids):
    key_results = id2key(token, ids)
    key_lookup = _to_lookup(key_results, 'key', 'ide')
    param = _list_to_query('key', [*key_lookup])
    results = _get_json(token, 'dup_k2n', param)
    # Post work
    json_r = []
    for r in results:
        json_r.append({'ide': key_lookup[r['key']], 'inc': r['inc']})
    return json_r


def inchi2id(token, inc):
    param = _list_to_query('inc', inc)
    results = _get_json(token, 'dup_k2n', param)
    key_lookup = _to_lookup(results, 'key', 'inc')
    results = key2id(token, [*key_lookup])
    json_r = {}
    for r in results.keys():
        json_r[key_lookup[r]] = results[r]
    return json_r


def inchi2smiles(token, inc):
    param = _list_to_query('inc', inc)
    results = _get_json(token, 'dup_k2n', param)
    key_lookup = _to_lookup(results, 'key', 'inc')
    results = key2smiles(token, [*key_lookup])
    json_r = []
    for r in results:
        json_r.append({'inc': key_lookup[r['key']], 'smi': r['smi']})
    return json_r


def inchi2key(token, inc):
    param = _list_to_query('inc', inc)
    results = _get_json(token, 'dup_k2n', param)
    return results

In [11]:
token = get_token('demo@anl.gov', 'TURS9HN9~E2C-UEH9UF')

In [18]:
# S -> Key
print(smiles2key(token, 'C'))
print(smiles2key(token, ['C', 'CCCCCC']))

[{'smi': 'C', 'key': 'VNWKTOKETHGBQD-UHFFFAOYSA-N'}]
[{'smi': 'C', 'key': 'VNWKTOKETHGBQD-UHFFFAOYSA-N'}, {'smi': 'CCCCCC', 'key': 'VLKZOEOYAKHREP-UHFFFAOYSA-N'}]


In [17]:
# S -> id
print(smiles2id(token, 'C'))
print(smiles2id(token, ['C', 'CCCCCC']))

{'C': {'chm': 'CHEMBL17564', 'qm9': '1', 'pch': 'PC-281', 'mcu': 'MCULE-1431015236'}}
{'C': {'chm': 'CHEMBL17564', 'qm9': '1', 'pch': 'PC-281', 'mcu': 'MCULE-1431015236'}, 'CCCCCC': {'chm': 'CHEMBL15939', 'emo': '299998816-299998816', 'qm9': '543', 'pch': 'PC-7664', 'mcu': 'MCULE-3465692202'}}


In [16]:
# id -> key
print(id2key(token, 'qm9:1'))
print(id2key(token, ['qm9:1', 'qm9:543']))

[{'ide': 'qm9:1', 'key': 'VNWKTOKETHGBQD-UHFFFAOYSA-N'}]
[{'ide': 'qm9:1', 'key': 'VNWKTOKETHGBQD-UHFFFAOYSA-N'}, {'ide': 'qm9:543', 'key': 'VLKZOEOYAKHREP-UHFFFAOYSA-N'}]


In [19]:



# S -> inchi
print(smiles2inchi(token, 'C'))
print(smiles2inchi(token, ['C', 'CCCCCC']))
# Key -> smi
print(key2smiles(token, 'VNWKTOKETHGBQD-UHFFFAOYSA-N'))
print(key2smiles(token, ['VNWKTOKETHGBQD-UHFFFAOYSA-N', 'VLKZOEOYAKHREP-UHFFFAOYSA-N']))
# Key -> INC
print(key2inchi(token, 'VNWKTOKETHGBQD-UHFFFAOYSA-N'))
print(key2inchi(token, ['VNWKTOKETHGBQD-UHFFFAOYSA-N', 'VLKZOEOYAKHREP-UHFFFAOYSA-N']))
# Key -> Ids
print(key2id(token, 'VNWKTOKETHGBQD-UHFFFAOYSA-N'))
print(key2id(token, ['VNWKTOKETHGBQD-UHFFFAOYSA-N', 'VLKZOEOYAKHREP-UHFFFAOYSA-N']))
# id -> smi
print(id2smiles(token, 'qm9:1'))
print(id2smiles(token, ['qm9:1', 'qm9:543']))

# id -> inc
print(id2inchi(token, 'qm9:1'))
print(id2inchi(token, ['qm9:1', 'qm9:543']))
# inc -> id
print(inchi2id(token, 'InChI=1S/CH4/h1H4'))
print(inchi2id(token, ['InChI=1S/CH4/h1H4', 'InChI=1S/C6H14/c1-3-5-6-4-2/h3-6H2,1-2H3']))
# inc -> smi
print(inchi2smiles(token, 'InChI=1S/CH4/h1H4'))
print(inchi2smiles(token, ['InChI=1S/CH4/h1H4', 'InChI=1S/C6H14/c1-3-5-6-4-2/h3-6H2,1-2H3']))
# inc -> key
print(inchi2key(token, 'InChI=1S/CH4/h1H4'))
print(inchi2key(token, ['InChI=1S/CH4/h1H4', 'InChI=1S/C6H14/c1-3-5-6-4-2/h3-6H2,1-2H3']))

[{'smi': 'C', 'inc': 'InChI=1S/CH4/h1H4'}]
[{'smi': 'CCCCCC', 'inc': 'InChI=1S/C6H14/c1-3-5-6-4-2/h3-6H2,1-2H3'}, {'smi': 'C', 'inc': 'InChI=1S/CH4/h1H4'}]
[{'key': 'VNWKTOKETHGBQD-UHFFFAOYSA-N', 'smi': 'C'}]
[{'key': 'VNWKTOKETHGBQD-UHFFFAOYSA-N', 'smi': 'C'}, {'key': 'VLKZOEOYAKHREP-UHFFFAOYSA-N', 'smi': 'CCCCCC'}]
[{'key': 'VNWKTOKETHGBQD-UHFFFAOYSA-N', 'inc': 'InChI=1S/CH4/h1H4'}]
[{'key': 'VLKZOEOYAKHREP-UHFFFAOYSA-N', 'inc': 'InChI=1S/C6H14/c1-3-5-6-4-2/h3-6H2,1-2H3'}, {'key': 'VNWKTOKETHGBQD-UHFFFAOYSA-N', 'inc': 'InChI=1S/CH4/h1H4'}]
{'VNWKTOKETHGBQD-UHFFFAOYSA-N': {'chm': 'CHEMBL17564', 'qm9': '1', 'pch': 'PC-281', 'mcu': 'MCULE-1431015236'}}
{'VNWKTOKETHGBQD-UHFFFAOYSA-N': {'chm': 'CHEMBL17564', 'qm9': '1', 'pch': 'PC-281', 'mcu': 'MCULE-1431015236'}, 'VLKZOEOYAKHREP-UHFFFAOYSA-N': {'chm': 'CHEMBL15939', 'emo': '299998816-299998816', 'qm9': '543', 'pch': 'PC-7664', 'mcu': 'MCULE-3465692202'}}
[{'ide': 'qm9:1', 'smi': 'C'}]
[{'ide': 'qm9:1', 'smi': 'C'}, {'ide': 'qm9:543', 'sm