Skip to content

Commit

Permalink
Merge pull request #166 from indralab/kbs
Browse files Browse the repository at this point in the history
Add new knowledge sources
  • Loading branch information
pagreene committed May 11, 2021
2 parents d33d156 + d077b2a commit 5717000
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 0 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ We include the information from these pre-existing databases:
- [CTD](http://ctdbase.org/)
- [Phospho.ELM](http://phospho.elm.eu.org/)
- [DrugBank](https://www.drugbank.ca/)
- [CONIB](https://pharmacome.github.io/conib/)
- [CRoG](https://github.com/chemical-roles/chemical-roles)
- [DGI](https://www.dgidb.org/)

These databases are retrieved primarily using the tools in `indra.sources`. The
statements extracted from all of these sources are stored and updated in the
Expand Down
67 changes: 67 additions & 0 deletions indra_db/managers/knowledgebase_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,73 @@ def _expanded(stmts):
yield stmt


class DgiManager(KnowledgebaseManager):
"""This manager handles retrieval and processing of the DGI dataset."""
name = 'DGI'
short_name = 'dgi'
source = 'dgi'

def _get_statements(self):
from indra.sources import dgi
logger.info('Processing DGI from web')
dp = dgi.process_version('2020-Nov')
logger.info('Expanding evidences and deduplicating')
filtered_stmts = [s for s in _expanded(dp.statements)]
unique_stmts, _ = extract_duplicates(filtered_stmts,
KeyFunc.mk_and_one_ev_src)
return unique_stmts


class CrogManager(KnowledgebaseManager):
"""This manager handles retrieval and processing of the CRoG dataset."""
name = 'CRoG'
short_name = 'crog'
source = 'crog'

def _get_statements(self):
from indra.sources import crog
logger.info('Processing CRoG from web')
cp = crog.process_from_web()
logger.info('Expanding evidences and deduplicating')
filtered_stmts = [s for s in _expanded(cp.statements)]
unique_stmts, _ = extract_duplicates(filtered_stmts,
KeyFunc.mk_and_one_ev_src)
return unique_stmts


class ConibManager(KnowledgebaseManager):
"""This manager handles retrieval and processing of the CONIB dataset."""
name = 'CONIB'
short_name = 'conib'
source = 'bel'

def _get_statements(self):
import pybel
import requests
from indra.sources.bel import process_pybel_graph
logger.info('Processing CONIB from web')
url = 'https://github.com/pharmacome/conib/raw/master/conib' \
'/_cache.bel.nodelink.json'
res_json = requests.get(url).json()
graph = pybel.from_nodelink(res_json)
# Get INDRA statements
pbp = process_pybel_graph(graph)

# Fix and issue with PMID spaces
for stmt in pbp.statements:
for ev in stmt.evidence:
if ev.pmid:
ev.pmid = ev.pmid.strip()
if ev.text_refs.get('PMID'):
ev.text_refs['PMID'] = ev.text_refs['PMID'].strip()

logger.info('Expanding evidences and deduplicating')
filtered_stmts = [s for s in _expanded(pbp.statements)]
unique_stmts, _ = extract_duplicates(filtered_stmts,
KeyFunc.mk_and_one_ev_src)
return unique_stmts


if __name__ == '__main__':
import sys
from indra_db.util import get_db
Expand Down

0 comments on commit 5717000

Please sign in to comment.