Skip to content

Commit

Permalink
Merge pull request #541 from biolink/pipeline-206-small-updates
Browse files Browse the repository at this point in the history
updates needed for new pipeline kernel, small bugifxes
  • Loading branch information
kltm committed Mar 17, 2021
2 parents dfa1491 + 331c88c commit 3162625
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 7 deletions.
5 changes: 1 addition & 4 deletions ontobio/io/assocparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from dataclasses import dataclass

from collections import namedtuple, defaultdict
from typing import Optional, List, Dict, Set
from typing import Optional, List, Dict, Set, Union

from ontobio import ontol
from ontobio import ecomap
Expand Down Expand Up @@ -897,9 +897,6 @@ def parse_with_dateutil(date: str, repot: Report, line: List) -> Optional[associ

return d

def parse_annotation_properties(properties_field: str):
properties_list = [tuple(prop.split("=", maxsplit=1)) for prop in properties_field.split("|") if prop]
return properties_list

## we generate both qualifier and relation field
## Returns: (negated, relation, other_qualifiers)
Expand Down
4 changes: 4 additions & 0 deletions ontobio/io/gaference.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from ontobio.model import association
from ontobio.rdfgen import relations

import functools
import logging
import copy

Expand Down Expand Up @@ -84,6 +85,9 @@ def load_gaferencer_inferences_from_file(gaferencer_out) -> Dict[AnnotationKey,

return build_annotation_inferences(gaferencer_out_dict)

def load_gaferencer_inferences_from_files(gaferencer_out_list) -> Dict[AnnotationKey, InferenceValue]:
return functools.reduce(load_gaferencer_inferences_from_file, gaferencer_out_list, dict())


def build_annotation_inferences(gaferencer_out: List[Dict]) -> Dict[AnnotationKey, InferenceValue]:
inferences = dict() # type Dict[AnnotationKey, InferenceValue]
Expand Down
4 changes: 2 additions & 2 deletions ontobio/io/gpadparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def from_1_2(gpad_line: List[str], report=None, group="unknown", dataset="unknow
report.error(source_line, Report.EXTENSION_SYNTAX_ERROR, conjunctions.info, "extensions should be relation(curie)", taxon=str(taxon), rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)

properties_list = assocparser.parse_annotation_properties(gpad_line[11])
properties_list = association.parse_annotation_properties(gpad_line[11])


# print(properties_list)
Expand Down Expand Up @@ -456,7 +456,7 @@ def from_2_0(gpad_line: List[str], report=None, group="unknown", dataset="unknow
report.error(source_line, Report.EXTENSION_SYNTAX_ERROR, conjunctions.info, "extensions should be relation(curie)", taxon=str(taxon), rule=1)
return assocparser.ParseResult(source_line, [], True, report=report)

properties_list = assocparser.parse_annotation_properties(gpad_line[11])
properties_list = association.parse_annotation_properties(gpad_line[11])

a = association.GoAssociation(
source_line=source_line,
Expand Down
22 changes: 22 additions & 0 deletions ontobio/model/association.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
def ymd_str(date: Date, separator: str) -> str:
return "{year}{sep}{month}{sep}{day}".format(year=date.year, sep=separator, month=date.month, day=date.day)


@dataclass
class Error:
info: str
Expand All @@ -38,6 +39,27 @@ class Error:
def is_error(self):
return True

def TwoTupleStr(items: List[str]) -> tuple:
"""
Create a tuple of of str that is guaranteed to be of length two from a list
If the list is larger, then only the first two elements will be used.
If the list is smaller, then the empty string will be used
"""

if len(items) > 2:
items = items[:2]

if len(items) < 2:
items += [""] * (2 - len(items))

return tuple([str(i) for i in items])


def parse_annotation_properties(properties_field: str) -> List[tuple]:
properties_list = [TwoTupleStr(prop.split("=", maxsplit=1)) for prop in properties_field.split("|") if prop]
return properties_list

@dataclass(unsafe_hash=True)
class Curie:
namespace: str
Expand Down
3 changes: 2 additions & 1 deletion tests/test_gpad_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from ontobio.io.gpadparser import GpadParser, to_association
from ontobio.io import assocparser
from ontobio.model import association
from ontobio.model.association import ConjunctiveSet, ExtensionUnit, Curie

import yaml
Expand Down Expand Up @@ -61,7 +62,7 @@ def test_parse_interacting_taxon():

def test_duplicate_key_annot_properties():
properties_str = "creation-date=2008-02-07|modification-date=2010-12-01|comment=v-KIND domain binding of Kndc1;MGI:1923734|contributor-id=http://orcid.org/0000-0003-2689-5511|contributor-id=http://orcid.org/0000-0003-3394-9805"
prop_list = assocparser.parse_annotation_properties(properties_str)
prop_list = association.parse_annotation_properties(properties_str)
contributor_ids = [value for key, value in prop_list if key == "contributor-id"]
assert set(contributor_ids) == {"http://orcid.org/0000-0003-2689-5511", "http://orcid.org/0000-0003-3394-9805"}

Expand Down

0 comments on commit 3162625

Please sign in to comment.