### Finding small molecule inhibitors for COVID-19 Disease Map (DM) pathways

Fetch the relevant pathway model from MMINERVA, and extract model elements

In [1]:
from covid_19.disease_maps.minerva_client import *
from covid_19.disease_maps.id_mapping_minerva import *

# Some basic elements of the COVID-19 DM
config = get_config(default_map_name)
project_id = get_project_id_from_config(config)
models = get_models(project_id, default_map_name)

In [2]:
[m['name'] for m in models]

['overview',
 'Virus replication cycle',
 'PAMP signalling',
 'Interferon 1 pathway',
 'Orf3a protein interactions',
 'TGFbeta signalling',
 'Interferon lambda pathway',
 'Kynurenine synthesis pathway',
 'HMOX1 pathway',
 'Orf10 Cul2 pathway',
 'E protein interactions',
 'SARS-CoV-2 RTC and transcription',
 'JNK pathway',
 'Endoplasmatic Reticulum stress',
 'Apoptosis pathway',
 'Nsp14 and metabolism',
 'Coagulation pathway',
 'Nsp4 and Nsp6 protein interactions',
 'Pyrimidine deprivation',
 'Electron Transport Chain disruption',
 'Renin-angiotensin pathway',
 'Nsp9 protein interactions',
 'NLRP3 inflammasome activation']

In [3]:
model_ids = {m['name']: m['idObject'] for m in models if m['name'] != 'overview'}
model_elements = {model_name: get_model_elements(model_id, project_id, default_map_name)
                  for model_name, model_id in model_ids.items()}

Now construct the INDRA mappings (i.e., equivalents using INDRA-compatible grounding namespaces and IDs) for these elements

In [4]:
indra_mappings = {model_name: [indra_db_refs_from_minerva_refs(get_element_references(element))
                  for element in model_elements[model_name]] for model_name in model_ids}

INFO: [2021-03-08 22:41:08] indra.ontology.bio.ontology - Loading INDRA bio ontology from cache at /Users/ben/.indra/bio_ontology/1.8/bio_ontology.pkl


In [5]:
indra_groundings = {model_name: set(get_prioritized_db_refs_key(db_refs)
                                    for db_refs in indra_mappings[model_name] if db_refs)
                    for model_name in model_ids}

In [6]:
indra_groundings

{'Virus replication cycle': {('CHEBI', 'CHEBI:135632'),
  ('CHEBI', 'CHEBI:16113'),
  ('CHEBI', 'CHEBI:28815'),
  ('GO', 'GO:0005737'),
  ('GO', 'GO:0005768'),
  ('GO', 'GO:0005783'),
  ('GO', 'GO:0005791'),
  ('GO', 'GO:0005793'),
  ('GO', 'GO:0006412'),
  ('GO', 'GO:0019012'),
  ('GO', 'GO:0019013'),
  ('GO', 'GO:0031982'),
  ('GO', 'GO:0039718'),
  ('GO', 'GO:0070992'),
  ('GO', 'GO:0071360'),
  ('HGNC', '11876'),
  ('HGNC', '13557'),
  ('HGNC', '2527'),
  ('HGNC', '2537'),
  ('HGNC', '8004'),
  ('HGNC', '8568'),
  ('UP', 'P0DTC1'),
  ('UP', 'P0DTC2'),
  ('UP', 'P0DTC3'),
  ('UP', 'P0DTC4'),
  ('UP', 'P0DTC5'),
  ('UP', 'P0DTC6'),
  ('UP', 'P0DTC7'),
  ('UP', 'P0DTC8'),
  ('UP', 'P0DTC9'),
  ('UP', 'P0DTD1'),
  ('UP', 'P0DTD2'),
  ('UP', 'P0DTD3'),
  ('UP', 'P0DTD8'),
  (None, None)},
 'PAMP signalling': {('GO', 'GO:0005634'),
  ('GO', 'GO:0005737'),
  ('GO', 'GO:0005768'),
  ('HGNC', '10019'),
  ('HGNC', '11562'),
  ('HGNC', '11584'),
  ('HGNC', '11849'),
  ('HGNC', '12030'),
  ('H

Filter down to all the human genes

In [7]:
human_genes = {model_name: {entry for entry in indra_groundings[model_name] if entry[0] == 'HGNC'}
               for model_name in model_ids}

In [8]:
for model_name, genes in human_genes.items():
    print(model_name, len(genes))

Virus replication cycle 6
PAMP signalling 45
Interferon 1 pathway 47
Orf3a protein interactions 46
TGFbeta signalling 23
Interferon lambda pathway 50
Kynurenine synthesis pathway 39
HMOX1 pathway 53
Orf10 Cul2 pathway 9
E protein interactions 26
SARS-CoV-2 RTC and transcription 0
JNK pathway 15
Endoplasmatic Reticulum stress 71
Apoptosis pathway 22
Nsp14 and metabolism 70
Coagulation pathway 57
Nsp4 and Nsp6 protein interactions 40
Pyrimidine deprivation 26
Electron Transport Chain disruption 213
Renin-angiotensin pathway 21
Nsp9 protein interactions 90
NLRP3 inflammasome activation 15


We can now turn to the INDRA DB and find Inhibition statements targeting each of these human genes in the pathway of interest. We filter regulators to ones grounded to small molecule name spaces. We also filter out statements that have been curated as incorrect.

In [9]:
from indra.sources import indra_db_rest
from indra.tools import assemble_corpus as ac
try:
    from indra_db import get_db
    from indra_db.client.principal.curation import get_curations
    curs = get_curations(get_db('primary'))
except Exception:
    curs = []

In [48]:
import os
import pickle
if os.path.exists('inhibitors.pkl'):
    with open('inhibitors.pkl', 'rb') as fh:
        inhibitors = pickle.load(fh)
else:
    inhibitors = {}
    for model, entries in human_genes.items():
        for db_ns, db_id in entries:
            if (db_ns, db_id) in inhibitors:
                continue
            ip = indra_db_rest.get_statements(object=f'{db_id}@{db_ns}', stmt_type='Inhibition',
                                              ev_limit=200)
            stmts = [s for s in ip.statements if set(s.subj.db_refs) & {'CHEBI', 'PUBCHEM', 'CAS', 'CHEMBL', 'DRUGBANK'}]
            stmts = ac.filter_by_curation(stmts, curations=curs)
            inhibitors[(db_ns, db_id)] = stmts
    with open('inhibitors.pkl', 'wb') as fh:
        pickle.dump(inhibitors, fh)

        
from indra.databases.identifiers import ensure_chebi_prefix
from indra.ontology.standardize import standardize_agent_name
from indra.preassembler import Preassembler
for target, stmts in inhibitors.items():
    use_stmts = []
    for stmt in stmts:
        stmt.evidence = [ev for ev in stmt.evidence if ev.source_api != 'medscan']
        if not stmt.evidence:
            continue
        for agent in stmt.real_agent_list():
            if 'CHEBI' in agent.db_refs:
                agent.db_refs['CHEBI'] = ensure_chebi_prefix(agent.db_refs['CHEBI'])
                standardize_agent_name(agent, standardize_refs=True)
        use_stmts.append(stmt)
    pa = Preassembler(None)
    stmts = pa.combine_duplicate_stmts(use_stmts)
    inhibitors[target] = stmts

We can now check how many targets there are small molecule inhibitors for

In [49]:
has_inhibitors = len([gene for gene, inhs in inhibitors.items() if inhs])
print('We found inhibitors for %d of %d targets' % (has_inhibitors, len(inhibitors)))

We found inhibitors for 634 of 802 targets


In [50]:
readers = {'reach', 'sparser', 'trips', 'isi', 'rlimsp', 'eidos', 'medscan'}
def get_top_inhibitors(inhs):
    def get_sources(stmt):
        return {ev.source_api for ev in stmt.evidence}
    has_db_support = sorted([stmt for stmt in inhs if
                             get_sources(stmt) - readers],
                            key=lambda x: len(x.evidence),
                            reverse=True)
    no_db_support = sorted([stmt for stmt in inhs if
                           get_sources(stmt) <= readers],
                           key=lambda x: len(x.evidence),
                           reverse=True)
    return {'db_support': has_db_support[:5], 'no_db_support': no_db_support[:5]}
    

In [51]:
top_inhibitors = {target: get_top_inhibitors(inhs) for target, inhs in inhibitors.items()}

In [52]:
top_inhibitors

{('HGNC', '11876'): {'db_support': [],
  'no_db_support': [Inhibition(camostat(), TMPRSS2()),
   Inhibition(drug(), TMPRSS2()),
   Inhibition(nafamostat(), TMPRSS2()),
   Inhibition(bromhexine(), TMPRSS2()),
   Inhibition(chloroquine(), TMPRSS2())]},
 ('HGNC',
  '8568'): {'db_support': [Inhibition((R)-noradrenaline(), FURIN()),
   Inhibition(decanoic acid(), FURIN()),
   Inhibition(pirfenidone(), FURIN()),
   Inhibition(2-amino-2-deoxy-D-glucopyranose(), FURIN()),
   Inhibition(dopaminoquinone(), FURIN())], 'no_db_support': [Inhibition(iron atom(), FURIN()),
   Inhibition(citric acid(), FURIN()),
   Inhibition(acenaphthoquinone(), FURIN()),
   Inhibition(luteolin(), FURIN()),
   Inhibition(ketone(), FURIN())]},
 ('HGNC',
  '13557'): {'db_support': [Inhibition(enalapril(), ACE2()),
   Inhibition(D-CAPTOPRIL(), ACE2()),
   Inhibition(ORE-1001(), ACE2()),
   Inhibition(enalaprilat (anhydrous)(), ACE2()),
   Inhibition(SPP1148(), ACE2())], 'no_db_support': [Inhibition(aldosterone(), ACE2()

In [53]:
import pandas
open_targets_url = ('https://storage.googleapis.com/open-targets-data-releases/'
 '21.02/input/annotation-files/tractability_buckets-2021-01-12.tsv')
df = pandas.read_csv(open_targets_url, sep='\t')
cols = [ 'Top_bucket_PROTAC',
 'Top_bucket_ab',
 'Top_bucket_othercl',
 'Top_bucket_sm',
]
target_scores = {}
for _, row in df.iterrows():
    vals = [row[c] for c in cols]
    target_scores[row['symbol']] = min(vals)

In [54]:
from collections import Counter
from indra.databases import identifiers
from indra.statements.agent import default_ns_order
from indra.databases import hgnc_client
ns_order = default_ns_order + ['DRUGBANK', 'PUBCHEM', 'CAS' 'CHEMBL']

def get_table_rows(target, stmts_by_support):
    target_symbol = hgnc_client.get_hgnc_name(target[1])
    target_id = '%s:%s' % ('hgnc', target[1])
    rows = []
    for support_type in ['db_support', 'no_db_support']:
        for stmt in stmts_by_support[support_type]:
            drug = stmt.subj
            drug_name = drug.name
            drug_ns, drug_id = drug.get_grounding(ns_order)
            if drug_ns:
                identifiers_ns = identifiers.get_identifiers_ns(drug_ns)
                drug_ns = identifiers_ns if identifiers_ns else drug_ns
                drug_grounding = '%s:%s' % (drug_ns, drug_id)
            else:
                drug_grounding = ''
            pmids = sorted({ev.pmid for ev in stmt.evidence if ev.pmid})[:10]
            pmids_str = ', '.join(pmids) if pmids else ''
            sources = Counter([ev.source_api for ev in stmt.evidence])
            sources_str = ', '.join('%s:%s' % (source_name, source_count)
                                    for source_name, source_count in sources.most_common())
            row = [target_symbol, target_id, drug_name, drug_grounding, support_type, sources_str, pmids_str]
            rows.append(row)
    return rows

def target_sort_key(target):
    target_symbol = hgnc_client.get_hgnc_name(target[1])
    return target_scores.get(target_symbol, 10)

all_rows = []
for target, inh_stmts in sorted(top_inhibitors.items(), key=lambda x: target_sort_key(x[0])):
    all_rows += get_table_rows(target, inh_stmts)
with open('inhibitors.csv', 'w') as fh:
    writer = csv.writer(fh)
    writer.writerows(all_rows)

In [55]:
from indra.assemblers.html import HtmlAssembler
from indra.ontology.bio import bio_ontology
all_stmts = []
# Dump target-specific HTMLs
for gene_grounding, inhs_by_support in top_inhibitors.items():
    inhs = inhs_by_support['db_support'] + inhs_by_support['no_db_support']
    ha = HtmlAssembler(inhs, db_rest_url='https://db.indra.bio')
    ha.make_model()
    gene_name = bio_ontology.get_name(*gene_grounding)
    ha.save_model(f'{gene_name}.html')
    all_stmts += inhs
# Now dump for all inhibitors in a single file
ha = HtmlAssembler(all_stmts, db_rest_url='https://db.indra.bio')
ha.make_model()
ha.save_model(f'all_inhibitors.html')

INFO: [2021-03-08 23:07:51] indra.assemblers.html.assembler - Removing PUBCHEM from refs due to too many matches: {'68810458', '9863776'}
INFO: [2021-03-08 23:07:52] indra.assemblers.html.assembler - Removing PUBCHEM from refs due to too many matches: {'53481561', '4971'}
INFO: [2021-03-08 23:07:53] indra.assemblers.html.assembler - Removing CHEBI from refs due to too many matches: {'CHEBI:137113', 'CHEBI:95080'}
INFO: [2021-03-08 23:07:53] indra.assemblers.html.assembler - Removing PUBCHEM from refs due to too many matches: {'6914273', '2952'}
INFO: [2021-03-08 23:07:55] indra.assemblers.html.assembler - Removing PUBCHEM from refs due to too many matches: {'439542', '10909430'}
INFO: [2021-03-08 23:07:56] indra.assemblers.html.assembler - Removing PUBCHEM from refs due to too many matches: {'439153', '928'}
INFO: [2021-03-08 23:07:57] indra.assemblers.html.assembler - Removing PUBCHEM from refs due to too many matches: {'439153', '928'}
INFO: [2021-03-08 23:07:57] indra.assemblers.htm

In [56]:
# Now dump for all inhibitors in a single file
ha = HtmlAssembler(all_stmts, db_rest_url='https://db.indra.bio')
ha.make_model()
ha.save_model(f'all_inhibitors.html')

INFO: [2021-03-08 23:13:23] indra.assemblers.html.assembler - Removing CHEBI from refs due to too many matches: {'CHEBI:137113', 'CHEBI:95080'}
INFO: [2021-03-08 23:13:23] indra.assemblers.html.assembler - Removing CAS from refs due to too many matches: {'116314-67-1', '130929-57-6'}
INFO: [2021-03-08 23:13:23] indra.assemblers.html.assembler - Removing PUBCHEM from refs due to too many matches: {'53481561', '4971'}
INFO: [2021-03-08 23:13:24] indra.assemblers.html.assembler - Removing PUBCHEM from refs due to too many matches: {'6914273', '2952'}
INFO: [2021-03-08 23:13:24] indra.assemblers.html.assembler - Removing PUBCHEM from refs due to too many matches: {'68810458', '9863776'}
INFO: [2021-03-08 23:13:24] indra.assemblers.html.assembler - Removing PUBCHEM from refs due to too many matches: {'439542', '10909430'}
INFO: [2021-03-08 23:13:24] indra.assemblers.html.assembler - Removing PUBCHEM from refs due to too many matches: {'439153', '928'}
INFO: [2021-03-08 23:13:24] indra.assem