diff --git a/src/bio2bel_chebi/manager.py b/src/bio2bel_chebi/manager.py index 6ba8083..d94d338 100644 --- a/src/bio2bel_chebi/manager.py +++ b/src/bio2bel_chebi/manager.py @@ -5,8 +5,9 @@ import datetime import logging import time -from typing import List, Mapping, Optional +from typing import Iterable, List, Mapping, Optional, Tuple +from networkx import relabel_nodes import pandas as pd from tqdm import tqdm @@ -294,28 +295,41 @@ def populate(self, log.info('populated in %.2f seconds', time.time() - t) + def normalize_chemicals(self, graph: BELGraph) -> None: + mapping = { + node: chemical.to_bel() + for node, chemical in list(self.iter_chemicals(graph)) + } + relabel_nodes(graph, mapping, copy=False) + + def iter_chemicals(self, graph: BELGraph) -> Iterable[Tuple[BaseEntity, Chemical]]: + """Iterate over pairs of BEL nodes and HGNC genes.""" + for node in graph: + chemical = self.get_chemical_from_data(node) + if chemical is not None: + yield node, chemical + def get_chemical_from_data(self, node: BaseEntity) -> Optional[Chemical]: namespace = node.get(NAMESPACE) - if namespace.lower() not in {'chebi', 'chebiid'}: + if not namespace or namespace.lower() not in {'chebi', 'chebiid'}: return identifier = node.get(IDENTIFIER) name = node.get(NAME) - if namespace.lower() == 'chebi': + if identifier is None and name is None: + raise ValueError + + if namespace.lower() == 'chebiid': + return self.get_chemical_by_chebi_id(name) + + elif namespace.lower() == 'chebi': if identifier is not None: return self.get_chemical_by_chebi_id(identifier) - - if name is not None: + else: # elif name is not None: return self.get_chemical_by_chebi_name(name) - else: - raise ValueError - - elif namespace.lower() == 'chebiid': - return self.get_chemical_by_chebi_id(name) - def enrich_chemical_hierarchy(self, graph: BELGraph) -> None: """Enrich the parents for all ChEBI chemicals in the graph.""" for _, data in graph.nodes(data=True): @@ -344,8 +358,7 @@ def to_bel(self) -> BELGraph: description=_chebi_description, ) - namespace = self.upload_bel_namespace() # Make sure the super id namespace is available - graph.namespace_url[namespace.keyword] = namespace.url + self.add_namespace_to_graph(graph) for relation in self._iterate_relations(): relation.add_to_graph(graph) diff --git a/src/bio2bel_chebi/models.py b/src/bio2bel_chebi/models.py index e20c8f9..531d8d6 100644 --- a/src/bio2bel_chebi/models.py +++ b/src/bio2bel_chebi/models.py @@ -2,14 +2,14 @@ """SQLAlchemy models for Bio2BEL ChEBI.""" -from typing import Mapping - from sqlalchemy import Column, Date, ForeignKey, Index, Integer, String, Text from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import backref, relationship +from typing import Mapping, Optional -from pybel.constants import NAME, PART_OF import pybel.dsl +from pybel import BELGraph +from pybel.constants import NAME __all__ = [ 'Base', @@ -61,10 +61,10 @@ def safe_name(self) -> str: """Either returns this molecule's name, or the parent name.""" return self.name or self.parent.name - def to_json(self, include_id: bool = False) -> Mapping: + def to_json(self, include_id: bool = False) -> Mapping[str, str]: """Export this chemical as dictionary. - :param bool include_id: Include the database identifier? + :param include_id: Include the database identifier? """ rv = { 'chebi_id': self.chebi_id, @@ -106,7 +106,7 @@ class Relation(Base): target_id = Column(Integer, ForeignKey('{}.id'.format(CHEMICAL_TABLE_NAME)), nullable=False) target = relationship('Chemical', foreign_keys=[target_id], backref=backref('in_edges', lazy='dynamic')) - def add_to_graph(self, graph): + def add_to_graph(self, graph: BELGraph) -> Optional[str]: """Add this relation to the graph. :param pybel.BELGraph graph: @@ -119,7 +119,7 @@ def add_to_graph(self, graph): return if self.type == 'has_part': - return graph.add_unqualified_edge(target, source, PART_OF) + return graph.add_part_of(target, source) if self.type == 'is_a': return graph.add_is_a(target, source)