Skip to content

Commit

Permalink
Upgrade to PyBEL 14 / Bio2BEL 3 (#10)
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Nov 23, 2019
1 parent 626d7fe commit 2cf8bc2
Show file tree
Hide file tree
Showing 13 changed files with 69 additions and 71 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.2.5-dev
current_version = 0.3.0-dev
commit = True
tag = False
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P<build>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?
Expand Down
4 changes: 2 additions & 2 deletions .readthedocs.yml
@@ -1,8 +1,8 @@
# See: https://docs.readthedocs.io/en/latest/yaml-config.html
build:
image: latest
image: latest
python:
version: 3.6
pip_install: true
extra_requirements:
- docs
- docs
3 changes: 1 addition & 2 deletions .travis.yml
Expand Up @@ -2,7 +2,7 @@ sudo: false
cache: pip
language: python
python:
- 3.6
- '3.7'
stages:
- lint
- docs
Expand All @@ -15,7 +15,6 @@ jobs:
- env: TOXENV=flake8
- env: TOXENV=pyroma
- env: TOXENV=xenon
- env: TOXENV=pyroma
# docs stage
- stage: docs
env: TOXENV=doc8
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Expand Up @@ -54,7 +54,7 @@
copyright = '2017-2018, Charles Tapley Hoyt'
author = 'Charles Tapley Hoyt'

release = '0.2.5-dev'
release = '0.3.0-dev'

parsed_version = re.match(
'(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(?:-(?P<release>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P<build>[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?',
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Expand Up @@ -24,8 +24,8 @@
'Topic :: Scientific/Engineering :: Bio-Informatics',
]
INSTALL_REQUIRES = [
'pybel>=0.13.1,<0.14.0',
'bio2bel>=0.2.0,<0.3.0',
'pybel>=0.14.0,<0.15.0',
'bio2bel>=0.3.0,<0.4.0',
'tqdm',
'sqlalchemy',
'click',
Expand Down
6 changes: 3 additions & 3 deletions src/bio2bel_hgnc/__init__.py
Expand Up @@ -6,14 +6,14 @@
from .manager import Manager # noqa: F401
from .utils import get_version # noqa: F401

__version__ = '0.2.5-dev'
__version__ = '0.3.0-dev'

__title__ = 'bio2bel_hgnc'
__description__ = "A package for converting HGNC to BEL"
__url__ = 'https://github.com/bio2bel/hgnc'

__author__ = 'Charles Tapley Hoyt'
__email__ = 'charles.hoyt@scai.fraunhofer.de'
__email__ = 'cthoyt@gmail.com'

__license__ = 'MIT License'
__copyright__ = 'Copyright (c) 2017-2018 Charles Tapley Hoyt'
__copyright__ = 'Copyright (c) 2017-2019 Charles Tapley Hoyt'
4 changes: 0 additions & 4 deletions src/bio2bel_hgnc/cli.py
Expand Up @@ -2,12 +2,8 @@

"""Command line interface for Bio2BEL HGNC."""

import logging

from .manager import Manager

log = logging.getLogger(__name__)

main = Manager.get_cli()

if __name__ == '__main__':
Expand Down
2 changes: 1 addition & 1 deletion src/bio2bel_hgnc/constants.py
Expand Up @@ -4,7 +4,7 @@

from bio2bel import get_data_dir

VERSION = '0.2.5-dev'
VERSION = '0.3.0-dev'

MODULE_NAME = 'hgnc'
#: The default directory where PyBEL files, including logs and the default cache, are stored. Created if not exists.
Expand Down
4 changes: 1 addition & 3 deletions src/bio2bel_hgnc/gfam_manager.py
Expand Up @@ -13,9 +13,7 @@
from pybel import BELGraph
from pybel.manager.models import Namespace, NamespaceEntry
from .model_utils import family_to_bel, gene_to_bel
from .models import (
Base, GeneFamily, gene_gene_family,
)
from .models import Base, GeneFamily, gene_gene_family
from .wrapper import BaseManager

__all__ = [
Expand Down
62 changes: 32 additions & 30 deletions src/bio2bel_hgnc/manager.py
Expand Up @@ -16,7 +16,7 @@
from bio2bel.manager.flask_manager import FlaskMixin
from bio2bel.manager.namespace_manager import BELNamespaceManagerMixin
from pybel import BELGraph
from pybel.constants import FUNCTION, GENE, IDENTIFIER, MIRNA, NAME, NAMESPACE, PROTEIN, RNA, VARIANTS
from pybel.constants import FUNCTION, MIRNA, NAME, NAMESPACE, PROTEIN, RNA, VARIANTS
from pybel.dsl import BaseEntity, CentralDogma, FUNC_TO_DSL, rna as rna_dsl
from pybel.manager.models import Namespace, NamespaceEntry
from .constants import ENCODINGS, ENTREZ, MODULE_NAME
Expand All @@ -28,14 +28,13 @@
)
from .wrapper import BaseManager

log = logging.getLogger(__name__)

__all__ = [
'Manager',
]

UNIPROT_RE = r'^([A-N,R-Z][0-9]([A-Z][A-Z, 0-9][A-Z, 0-9][0-9]){1,2})|([O,P,Q][0-9][A-Z, 0-9][A-Z, 0-9][A-Z, 0-9][0-9])(\.\d+)?$'
logger = logging.getLogger(__name__)

UNIPROT_RE = r'^([A-N,R-Z][0-9]([A-Z][A-Z, 0-9][A-Z, 0-9][0-9]){1,2})|([O,P,Q][0-9][A-Z, 0-9][A-Z, 0-9][A-Z, 0-9][0-9])(\.\d+)?$'
GENE_FAMILY_NAMESPACES = {'gfam', 'hgnc.family', 'hgnc.genefamily'}


Expand Down Expand Up @@ -181,7 +180,7 @@ def get_gene_by_mgi_id(self, mgi_id: str) -> Optional[HumanGene]:
human_genes = mouse_gene.hgncs

if len(human_genes) > 1:
log.warning('multiple human genes mapped to mgi_id:%s: %s', mgi_id, human_genes)
logger.warning('multiple human genes mapped to mgi_id:%s: %s', mgi_id, human_genes)
return

return human_genes[0]
Expand All @@ -200,7 +199,7 @@ def get_gene_by_rgd_id(self, rgd_id: str) -> Optional[HumanGene]:
human_genes = rat_gene.hgncs

if len(human_genes) > 1:
log.warning('multiple human genes mapped to rgd_id:%s: %s', rgd_id, human_genes)
logger.warning('multiple human genes mapped to rgd_id:%s: %s', rgd_id, human_genes)
return

return human_genes[0]
Expand Down Expand Up @@ -230,12 +229,15 @@ def get_node(self, node: BaseEntity) -> Optional[HumanGene]:
:param node: The node to look for
:raises: KeyError
"""
if NAMESPACE not in node:
if not isinstance(node, CentralDogma):
return

namespace = node.namespace
if namespace is None:
return

namespace = node[NAMESPACE]
identifier = node.get(IDENTIFIER)
name = node.get(NAME)
identifier = node.identifier
name = node.name

if namespace.lower() in {'hgnc'}:
return self._get_node_handle_hgnc(identifier, name)
Expand Down Expand Up @@ -325,26 +327,22 @@ def normalize_genes(self, graph: BELGraph, use_tqdm: bool = False) -> None:

def enrich_genes_with_equivalences(self, graph: BELGraph) -> None:
"""Enrich genes with their corresponding UniProt."""
self.add_namespace_to_graph(graph)

if 'uniprot' not in graph.namespace_url:
graph.namespace_pattern['uniprot'] = UNIPROT_RE

for node, human_gene in list(self.iter_genes(graph)):
func = node.function

if human_gene.entrez:
graph.add_equivalence(node, FUNC_TO_DSL[func](
entrez_node = FUNC_TO_DSL[func](
namespace=ENTREZ,
name=human_gene.symbol,
identifier=str(human_gene.entrez)
))
)
graph.add_equivalence(node, entrez_node)

if func in {PROTEIN, RNA, GENE}:
if func == PROTEIN:
for uniprot in human_gene.uniprots:
graph.add_equivalence(node, uniprot_to_bel(uniprot))

if func in {RNA, GENE}:
if func == RNA:
if human_gene.mirbase:
mirbase_rna_node = rna_dsl(
namespace='mirbase',
Expand Down Expand Up @@ -397,21 +395,25 @@ def enrich_families_with_genes(self, graph: BELGraph):
self.add_namespace_to_graph(graph)

for gene_family_node in list(graph):
if gene_family_node[FUNCTION] != GENE:
if not isinstance(gene_family_node, pybel.dsl.Gene):
continue

if gene_family_node.get(NAMESPACE).lower() not in GENE_FAMILY_NAMESPACES:
namespace = gene_family_node.namespace
if namespace is None or namespace.lower() not in GENE_FAMILY_NAMESPACES:
continue

if IDENTIFIER in gene_family_node:
gene_family_model = self.get_family_by_id(gene_family_node[IDENTIFIER])
elif NAME in gene_family_node:
gene_family_model = self.get_family_by_name(gene_family_node[NAME])
identifier = gene_family_node.identifier
name = gene_family_node.name

if identifier:
gene_family_model = self.get_family_by_id(identifier)
elif name:
gene_family_model = self.get_family_by_name(name)
else:
raise ValueError

if gene_family_model is None:
log.info('family not found: %s', gene_family_node)
logger.info('family not found: %s', gene_family_node)
continue

for human_gene in gene_family_model.hgncs:
Expand Down Expand Up @@ -523,12 +525,12 @@ def to_bel(self) -> BELGraph:
)

hgnc_namespace = self.upload_bel_namespace()
log.info('using default namespace: %s at %s', hgnc_namespace, hgnc_namespace.url)
logger.info('using default namespace: %s at %s', hgnc_namespace, hgnc_namespace.url)
graph.namespace_url[hgnc_namespace.keyword] = hgnc_namespace.url

gfam_manager = GfamManager(connection=self.connection)
gfam_namespace = gfam_manager.upload_bel_namespace()
log.info('using default namespace: %s at %s', gfam_namespace, gfam_namespace.url)
logger.info('using default namespace: %s at %s', gfam_namespace, gfam_namespace.url)
graph.namespace_url[gfam_namespace.keyword] = gfam_namespace.url

for human_gene in tqdm(self.list_human_genes(), total=self.count_human_genes(),
Expand Down Expand Up @@ -613,9 +615,9 @@ def _cli_add_populate(main: click.Group) -> click.Group: # noqa: D202
def populate(manager, reset, skip_hcop):
"""Populate the database."""
if reset:
log.info('Deleting the previous instance of the database')
logger.info('Deleting the previous instance of the database')
manager.drop_all()
log.info('Creating new models')
logger.info('Creating new models')
manager.create_all()

manager.populate(use_hcop=(not skip_hcop))
Expand Down
14 changes: 10 additions & 4 deletions src/bio2bel_hgnc/model_utils.py
Expand Up @@ -17,8 +17,11 @@
]


def gene_to_bel(human_gene: HumanGene, func: Optional[str] = None,
variants: Optional[List[Variant]] = None) -> CentralDogma:
def gene_to_bel(
human_gene: HumanGene,
func: Optional[str] = None,
variants: Optional[List[Variant]] = None,
) -> CentralDogma:
"""Convert a Gene to a PyBEL gene."""
dsl = FUNC_TO_DSL[func] if func else gene_dsl

Expand All @@ -34,8 +37,11 @@ def gene_to_bel(human_gene: HumanGene, func: Optional[str] = None,
return rv


def family_to_bel(family: GeneFamily, func: Optional[str] = None,
variants: Optional[List[Variant]] = None) -> CentralDogma:
def family_to_bel(
family: GeneFamily,
func: Optional[str] = None,
variants: Optional[List[Variant]] = None,
) -> CentralDogma:
"""Convert a Gene Family model to a PyBEL gene."""
dsl = FUNC_TO_DSL[func] if func else gene_dsl

Expand Down
25 changes: 14 additions & 11 deletions tests/test_enrich_metadata.py
Expand Up @@ -5,6 +5,7 @@
import logging
import unittest

import pybel.dsl
from bio2bel_hgnc import Manager
from bio2bel_hgnc.constants import ENTREZ, HGNC, HGNC_GENE_FAMILY
from bio2bel_hgnc.models import HumanGene
Expand All @@ -13,7 +14,7 @@
from pybel.dsl import gene, mirna, protein, rna
from tests.cases import TemporaryCacheMixin

log = logging.getLogger(__name__)
logger = logging.getLogger(__name__)

protein_hgnc_cd33 = protein(name='CD33', namespace=HGNC)
gene_hgnc_cd33 = protein_hgnc_cd33.get_rna().get_gene()
Expand Down Expand Up @@ -74,35 +75,37 @@ def test_get_rgd_node(self):
cd33_model = self.manager.get_node(cd33_rgd_name)
self.help_check_cd33_model(cd33_model)

def test_get_rgd_id_node(self):
"""Test getting a node by RGD identifier."""
cd33_model = self.manager.get_node(cd33_rgd_id)
self.help_check_cd33_model(cd33_model)

@unittest.skip('HGNC does not have MGI symbol information')
def test_get_mgi_node(self):
"""Test getting a node by MGI name."""
cd33_model = self.manager.get_node(cd33_mgi_name)
self.help_check_cd33_model(cd33_model)

def test_get_mgi_id_node(self):
"""Test getting a node by MGI identifier."""
cd33_model = self.manager.get_node(cd33_mgi_identifier)
self.help_check_cd33_model(cd33_model)

def test_get_entrez_node(self):
"""Test getting a node by Entrez Gene identifier."""
cd33_model = self.manager.get_node(cd33_entrez)
self.help_check_cd33_model(cd33_model)

def test_add_equivalency(self):
"""Test that CD33 identified by HGNC and entrez can be equivalenced."""
gene_model = self.manager.get_gene_by_hgnc_symbol('CD33')
self.assertIsNotNone(gene_model)
self.assertEqual('CD33', gene_model.symbol)
self.assertEqual('1659', str(gene_model.identifier))
self.assertEqual('945', gene_model.entrez)

graph = BELGraph()
graph.add_node_from_data(protein_hgnc_cd33)

self.assertEqual(1, graph.number_of_nodes(), msg='wrong initial number of nodes')
self.assertEqual(0, graph.number_of_edges(), msg='wrong initial number of edges')

genes = list(self.manager.iter_genes(graph))
self.assertEqual(1, len(genes))
e, m = genes[0]
self.assertIsInstance(e, pybel.dsl.Protein)
self.assertIsInstance(m, HumanGene)

self.manager.enrich_genes_with_equivalences(graph)

# self.assertEqual(2, graph.number_of_nodes(), msg='nodes: {}'.format(list(graph)))
Expand Down
8 changes: 1 addition & 7 deletions tox.ini
Expand Up @@ -42,12 +42,6 @@ deps = check-manifest
commands = check-manifest
skip_install = true

[testenv:vulture]
deps = vulture
skip_install = true
commands = vulture src/bio2bel_hgnc/
description = Run the vulture tool to look for dead code.

[testenv:xenon]
deps = xenon
skip_install = true
Expand All @@ -66,7 +60,7 @@ deps =
pyroma
skip_install = true
commands = pyroma --min=10 .
description = Run the pyroma tool to check the project's package friendliness.
description = Run the pyroma tool to check the project package friendliness.

[testenv:docs]
changedir = docs
Expand Down

0 comments on commit 2cf8bc2

Please sign in to comment.