Skip to content

Commit

Permalink
Implement some search functions
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Apr 30, 2018
1 parent 66a3b50 commit a5569b1
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 18 deletions.
59 changes: 53 additions & 6 deletions src/bio2bel_hgnc/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
import time
from collections import Counter

from tqdm import tqdm

from bio2bel.abstractmanager import AbstractManager
from pybel import BELGraph, to_bel
from pybel.constants import FUNCTION, GENE, IDENTIFIER, IS_A, NAME, NAMESPACE, NAMESPACE_DOMAIN_GENE, PROTEIN, RNA
from pybel.dsl import gene as gene_dsl, protein as protein_dsl, rna as rna_dsl
from pybel.manager.models import Namespace, NamespaceEntry
from pybel.resources import get_latest_arty_namespace, write_namespace
from pybel.resources.arty import get_today_arty_knowledge, get_today_arty_namespace
from pybel.resources.deploy import deploy_knowledge, deploy_namespace
from tqdm import tqdm

from bio2bel.abstractmanager import AbstractManager
from .constants import GENE_FAMILY_KEYWORD, MODULE_NAME, encodings
from .models import Base, GeneFamily, HumanGene, MouseGene, RatGene, UniProt
from .wrapper import BaseManager
Expand Down Expand Up @@ -235,7 +235,19 @@ def get_gene_by_mgi_id(self, mgi_id):
:param str mgi_id: MGI identifier
:rtype: Optional[bio2bel_hgnc.models.HumanGene]
"""
raise NotImplementedError
results = self.mgd(mgdid=mgi_id)
mouse_gene = _deal_with_nonsense(results)

if mouse_gene is None:
return

human_genes = mouse_gene.hgncs

if len(human_genes) > 1:
log.warning('multiple human genes mapped to mgi_id:%s: %s', mgi_id, human_genes)
return

return human_genes[0]

def get_gene_by_mgi_symbol(self, mgi_symbol):
"""Gets a HGNC gene by an orthologous MGI gene symbol
Expand All @@ -253,7 +265,19 @@ def get_gene_by_rgd_id(self, rgd_id):
:param str rgd_id: RGD identifier
:rtype: Optional[bio2bel_hgnc.models.HumanGene]
"""
raise NotImplementedError
results = self.rgd(rgdid=rgd_id)
rat_gene = _deal_with_nonsense(results)

if rat_gene is None:
return

human_genes = rat_gene.hgncs

if len(human_genes) > 1:
log.warning('multiple human genes mapped to rgd_id:%s: %s', rgd_id, human_genes)
return

return human_genes[0]

def get_gene_by_rgd_symbol(self, rgd_symbol):
"""Gets a HGNC gene by an orthologous RGD identifier
Expand Down Expand Up @@ -303,13 +327,23 @@ def get_node(self, graph, node):
return self.get_gene_by_mgi_symbol(name)
raise KeyError

if namespace == 'MGIID':
if name is None:
raise KeyError
return self.get_gene_by_mgi_id(name)

if namespace in {'RGD'}:
if identifer is not None:
return self.get_gene_by_rgd_id(identifer)
elif name is not None:
return self.get_gene_by_rgd_symbol(name)
raise KeyError

if namespace == 'RGDID':
if name is None:
raise KeyError
return self.get_gene_by_rgd_id(name)

def enrich_genes_with_families(self, graph):
"""Enrich genes in the BEL graph with their families
Expand Down Expand Up @@ -616,7 +650,20 @@ def get_all_hgnc_symbols_family(self):
return set(res)

def add_central_dogma(self, graph, node):
raise NotImplementedError
"""
:param graph:
:param node:
:return:
"""
if node not in graph:
raise ValueError

human_gene = self.get_node(graph, node)
encoding = encodings.get(human_gene.locus_type, 'GRP')

if 'R' in encoding:
graph.add_unqualified_edge(node, )

def add_node_equivalencies(self, graph, node, add_leaves=True):
"""Given an HGNC node, add equivalencies found in the database.
Expand Down
25 changes: 13 additions & 12 deletions tests/test_enrich_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def test_get_hgnc_id_node(self):
cd33_model = get_node(graph, cd33_tuple, connection=self.manager)
self.help_check_cd33_model(cd33_model)

@unittest.skip('HGNC does not have RGD symbols')
def test_get_rgd_node(self):
graph = BELGraph()

Expand All @@ -109,6 +110,7 @@ def test_get_rgd_id_node(self):
cd33_model = get_node(graph, cd33_tuple, connection=self.manager)
self.help_check_cd33_model(cd33_model)

@unittest.skip('HGNC does not have MGI symbol information')
def test_get_mgi_node(self):
graph = BELGraph()

Expand Down Expand Up @@ -140,18 +142,17 @@ def test_add_metadata(self):
graph = BELGraph()

cd33_test = protein(namespace='HGNC', name='CD33')
cd33_tuple = graph.add_node_from_data(cd33_test)
graph.add_node_from_data(cd33_test)

self.assertIn(cd33_tuple, graph, msg='Graph is missing CD33 protein node')
self.assertIsNone(graph.get_node_label(cd33_tuple), msg='CD33 should not have label information')
self.assertIsNone(graph.get_node_identifier(cd33_tuple), msg='CD33 should not have identifier information')
self.assertIn(cd33_test.as_tuple(), graph, msg='Graph is missing CD33 protein node')
self.assertIsNone(graph.get_node_label(cd33_test.as_tuple()), msg='CD33 should not have label information')
self.assertIsNone(graph.get_node_identifier(cd33_test.as_tuple()), msg='CD33 should not have identifier information')

add_metadata(graph, cd33_tuple, manager=self.manager)
add_metadata(graph, cd33_test.as_tuple(), manager=self.manager)

self.assertIn(cd33_tuple, graph, msg='Graph somehow lost CD33 protein node')
self.assertIn(cd33_test.as_tuple(), graph, msg='Graph somehow lost CD33 protein node')

self.assertEqual('CD33 molecule', graph.get_node_label(cd33_tuple), msg='Graph should be enriched with label')
self.assertEqual('1659', graph.get_node_identifier(cd33_tuple), msg='Graph should be enriched with identifier')
self.assertEqual('1659', graph.get_node_identifier(cd33_test.as_tuple()), msg='Graph should be enriched with identifier')

def test_add_equivalency(self):
graph = BELGraph()
Expand Down Expand Up @@ -249,7 +250,7 @@ def test_add_mirna(self):
self.assertEqual(1, graph.number_of_nodes())
self.assertEqual(0, graph.number_of_edges())

add_node_central_dogma(graph, mir489_gene_tuple)
add_node_central_dogma(graph, mir489_gene_tuple, connection=self.manager)

self.assertEqual(2, graph.number_of_nodes())
self.assertEqual(1, graph.number_of_edges())
Expand All @@ -264,12 +265,12 @@ def test_add_mirna(self):
def test_add_rna(self):
graph = BELGraph()
mir503hg_gene = gene(namespace='HGNC', name='MIR503HG', identifier='28258')
mir503hg_gene_tuple = graph.add_node_from_data(mir503hg_gene)
graph.add_node_from_data(mir503hg_gene)

self.assertEqual(1, graph.number_of_nodes())
self.assertEqual(0, graph.number_of_edges())

add_node_central_dogma(graph, mir503hg_gene_tuple)
add_node_central_dogma(graph, mir503hg_gene.as_tuple(), connection=self.manager)

self.assertEqual(2, graph.number_of_nodes())
self.assertEqual(1, graph.number_of_edges())
Expand All @@ -289,7 +290,7 @@ def test_add_protein(self):
self.assertEqual(1, graph.number_of_nodes())
self.assertEqual(0, graph.number_of_edges())

add_node_central_dogma(graph, cd33_gene_tuple)
add_node_central_dogma(graph, cd33_gene_tuple, connection=self.manager)

self.assertEqual(3, graph.number_of_nodes())
self.assertEqual(2, graph.number_of_edges())
Expand Down

0 comments on commit a5569b1

Please sign in to comment.