Skip to content

Commit

Permalink
Add normalization function
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Feb 24, 2019
1 parent 126db70 commit bab61ca
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 20 deletions.
39 changes: 26 additions & 13 deletions src/bio2bel_chebi/manager.py
Expand Up @@ -5,8 +5,9 @@
import datetime
import logging
import time
from typing import List, Mapping, Optional
from typing import Iterable, List, Mapping, Optional, Tuple

from networkx import relabel_nodes
import pandas as pd
from tqdm import tqdm

Expand Down Expand Up @@ -294,28 +295,41 @@ def populate(self,

log.info('populated in %.2f seconds', time.time() - t)

def normalize_chemicals(self, graph: BELGraph) -> None:
mapping = {
node: chemical.to_bel()
for node, chemical in list(self.iter_chemicals(graph))
}
relabel_nodes(graph, mapping, copy=False)

def iter_chemicals(self, graph: BELGraph) -> Iterable[Tuple[BaseEntity, Chemical]]:
"""Iterate over pairs of BEL nodes and HGNC genes."""
for node in graph:
chemical = self.get_chemical_from_data(node)
if chemical is not None:
yield node, chemical

def get_chemical_from_data(self, node: BaseEntity) -> Optional[Chemical]:
namespace = node.get(NAMESPACE)

if namespace.lower() not in {'chebi', 'chebiid'}:
if not namespace or namespace.lower() not in {'chebi', 'chebiid'}:
return

identifier = node.get(IDENTIFIER)
name = node.get(NAME)

if namespace.lower() == 'chebi':
if identifier is None and name is None:
raise ValueError

if namespace.lower() == 'chebiid':
return self.get_chemical_by_chebi_id(name)

elif namespace.lower() == 'chebi':
if identifier is not None:
return self.get_chemical_by_chebi_id(identifier)

if name is not None:
else: # elif name is not None:
return self.get_chemical_by_chebi_name(name)

else:
raise ValueError

elif namespace.lower() == 'chebiid':
return self.get_chemical_by_chebi_id(name)

def enrich_chemical_hierarchy(self, graph: BELGraph) -> None:
"""Enrich the parents for all ChEBI chemicals in the graph."""
for _, data in graph.nodes(data=True):
Expand Down Expand Up @@ -344,8 +358,7 @@ def to_bel(self) -> BELGraph:
description=_chebi_description,
)

namespace = self.upload_bel_namespace() # Make sure the super id namespace is available
graph.namespace_url[namespace.keyword] = namespace.url
self.add_namespace_to_graph(graph)

for relation in self._iterate_relations():
relation.add_to_graph(graph)
Expand Down
14 changes: 7 additions & 7 deletions src/bio2bel_chebi/models.py
Expand Up @@ -2,14 +2,14 @@

"""SQLAlchemy models for Bio2BEL ChEBI."""

from typing import Mapping

from sqlalchemy import Column, Date, ForeignKey, Index, Integer, String, Text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import backref, relationship
from typing import Mapping, Optional

from pybel.constants import NAME, PART_OF
import pybel.dsl
from pybel import BELGraph
from pybel.constants import NAME

__all__ = [
'Base',
Expand Down Expand Up @@ -61,10 +61,10 @@ def safe_name(self) -> str:
"""Either returns this molecule's name, or the parent name."""
return self.name or self.parent.name

def to_json(self, include_id: bool = False) -> Mapping:
def to_json(self, include_id: bool = False) -> Mapping[str, str]:
"""Export this chemical as dictionary.
:param bool include_id: Include the database identifier?
:param include_id: Include the database identifier?
"""
rv = {
'chebi_id': self.chebi_id,
Expand Down Expand Up @@ -106,7 +106,7 @@ class Relation(Base):
target_id = Column(Integer, ForeignKey('{}.id'.format(CHEMICAL_TABLE_NAME)), nullable=False)
target = relationship('Chemical', foreign_keys=[target_id], backref=backref('in_edges', lazy='dynamic'))

def add_to_graph(self, graph):
def add_to_graph(self, graph: BELGraph) -> Optional[str]:
"""Add this relation to the graph.
:param pybel.BELGraph graph:
Expand All @@ -119,7 +119,7 @@ def add_to_graph(self, graph):
return

if self.type == 'has_part':
return graph.add_unqualified_edge(target, source, PART_OF)
return graph.add_part_of(target, source)

if self.type == 'is_a':
return graph.add_is_a(target, source)
Expand Down

0 comments on commit bab61ca

Please sign in to comment.