Skip to content

Commit

Permalink
Fixes for bel validation
Browse files Browse the repository at this point in the history
  • Loading branch information
wshayes committed Sep 18, 2020
1 parent 83aebc8 commit 5cdf85a
Show file tree
Hide file tree
Showing 14 changed files with 266 additions and 217 deletions.
2 changes: 1 addition & 1 deletion src/bel/api/endpoints/orthology.py
Expand Up @@ -21,6 +21,6 @@ def get_orthologs(gene_id: str, species: str = ""):

species = [item for item in species.split(",") if item]

orthologs = bel.terms.orthologs(gene_id, species)
orthologs = bel.terms.orthologs.get_orthologs(gene_id, species)

return {"orthologs": orthologs}
131 changes: 85 additions & 46 deletions src/bel/lang/ast.py
Expand Up @@ -132,12 +132,12 @@ def update(self, name: str):
self.function_type = ""

def is_primary(self):
if self.function_type == "primary":
if self.function_type == "Primary":
return True
return False

def is_modifier(self):
if self.function_type == "modifier":
if self.function_type == "Modifier":
return True
return False

Expand Down Expand Up @@ -195,7 +195,7 @@ def decanonicalize(

def orthologize(self, species_key):
"""Orthologize Assertion
Check if fully orthologizable() before orthologizing, otherwise
you may get a partially orthologized Assertion
"""
Expand All @@ -211,7 +211,7 @@ def orthologize(self, species_key):

def orthologizable(self, species_key: Key) -> Optional[bool]:
"""Is this Assertion fully orthologizable?
Is it possible to orthologize every gene/protein/RNA NSArg to the target species?
"""

Expand All @@ -233,12 +233,19 @@ def validate(self, errors: List[ValidationError] = None):
if errors is None:
errors = []

# Process AST top-level args or Function args
# Collect term info for NSArgs before validation
if hasattr(self, "args"):
for arg in self.args:
if arg and arg.type == "NSArg":
arg.entity.add_term()

# Validate function (or top-level args)
errors.extend(validate_function(self))

# Recursively validate args that are functions
if hasattr(self, "args"):
for arg in self.args:
if arg and arg.type in ["Function"]:
if arg and arg.type == "Function":
arg.validate(errors=errors)

return errors
Expand Down Expand Up @@ -365,10 +372,6 @@ def __init__(self, entity: BelEntity, parent=None, span: NsArgSpan = None):
self.entity = entity

self.type = "NSArg"
self.value_types = []

def add_value_types(self, value_types):
self.value_types = value_types

def canonicalize(
self,
Expand Down Expand Up @@ -425,17 +428,13 @@ def __init__(self, value, span: Span = None, parent=None):
Arg.__init__(self, parent, span)
self.value = value
self.type = "StrArg"
self.value_types = []

def update(self, value: str):
"""Update to new BEL Entity"""

self.value = value
self.span = None

def add_value_types(self, value_types):
self.value_types = value_types

def to_string(self, fmt: str = "medium") -> str:
"""Convert AST object to string
Expand All @@ -461,7 +460,7 @@ def __str__(self):

class ParseInfo:
"""BEL Assertion Parse Information
Matching quotes need to be gathered first
"""

Expand Down Expand Up @@ -550,7 +549,7 @@ def __init__(
self.errors.append(ValidationError(type="Assertion", severity="Error", msg=msg))
elif assertion.object and (not assertion.subject or not assertion.relation):
msg = "Missing Assertion Subject or Relation"
self.errors.append(ValidationError(type="Assertion", severity="Error", msg=msg))
self.errors.append(ValidationError(type="Assertion", severity="Error", msg=msg))

if not self.errors and self.assertion is not None and not self.args:
self.parse() # parse assertion into BEL AST
Expand Down Expand Up @@ -691,8 +690,8 @@ def canonicalize(
decanonical_targets: Mapping[str, List[str]] = settings.BEL_DECANONICALIZE,
):
"""Canonicalize BEL Assertion
Must set both targets if not using defaults as the underlying normalization handles
Must set both targets if not using defaults as the underlying normalization handles
both canonical and decanonical forms in the same query
"""

Expand All @@ -719,8 +718,8 @@ def decanonicalize(
decanonical_targets: Mapping[str, List[str]] = settings.BEL_DECANONICALIZE,
):
"""Decanonicalize BEL Assertion
Must set both targets if not using defaults as the underlying normalization handles
Must set both targets if not using defaults as the underlying normalization handles
both canonical and decanonical forms in the same query
"""

Expand Down Expand Up @@ -756,8 +755,8 @@ def orthologize(self, species_key: Key):

def orthologizable(self, species_key: Key):
"""Is this Assertion fully orthologizable?
This method will detect if the orthologization will result
This method will detect if the orthologization will result
in a partially orthologized Assertion.
"""

Expand Down Expand Up @@ -896,7 +895,12 @@ def match_signatures(args, signatures):
"""Which signature to use"""

for signature in signatures:
if args[0].type == signature["arguments"][0]["type"]:
if (
args[0].type == "Function"
and args[0].function_type == signature["arguments"][0]["type"]
):
return signature
elif args[0].type == signature["arguments"][0]["type"]:
return signature


Expand Down Expand Up @@ -988,26 +992,48 @@ def validate_function(fn: Function, errors: List[ValidationError] = None) -> Lis
)
)

elif fn.args[position] and fn.args[position].type not in argument["type"]:
# elif (
# fn.args[position]
# and fn.args[position].type == "Function"
# and fn.args[position].function_type not in argument["type"]
# ):
# errors.append(
# ValidationError(
# type="Assertion",
# severity="Error",
# msg=f"Incorrect function type '{fn.args[position].type}' at position: {position} for function: {fn.name}, should be one of {argument['type']}",
# visual_pairs=[(fn.args[position].span.start, fn.args[position].span.end)],
# index=fn.args[position].span.start,
# )
# )

# Function name mis-match
elif (
fn.args[position]
and fn.args[position].type == "Function"
and not (fn.args[position].name in argument["values"])
):
errors.append(
ValidationError(
type="Assertion",
severity="Error",
msg=f"Incorrect argument type '{fn.args[position].type}' at position: {position} for function: {fn.name}, should be one of {argument['type']}",
msg=f"Incorrect function for argument '{fn.args[position].name}' at position: {position} for function: {fn.name}",
visual_pairs=[(fn.args[position].span.start, fn.args[position].span.end)],
index=fn.args[position].span.start,
)
)

# Function name mis-match
elif fn.args[position].type == "Function" and not (
fn.args[position].name in argument["values"]
# Wrong [non-function] argument type
elif (
fn.args[position]
and fn.args[position].type != "Function"
and fn.args[position].type not in argument["type"]
):
errors.append(
ValidationError(
type="Assertion",
severity="Error",
msg=f"Incorrect function for argument '{fn.args[position].name}' at position: {position} for function: {fn.name}",
msg=f"Incorrect argument type '{fn.args[position].type}' at position: {position} for function: {fn.name}, should be one of {argument['type']}",
visual_pairs=[(fn.args[position].span.start, fn.args[position].span.end)],
index=fn.args[position].span.start,
)
Expand Down Expand Up @@ -1060,28 +1086,41 @@ def validate_function(fn: Function, errors: List[ValidationError] = None) -> Lis

# Third pass - non-positional (primary/modifier) args that don't show up in opt_args or mult_args
opt_and_mult_args = opt_args + signature["mult_args"]
problem_args = set()
for fn_arg in fn.args[post_positional:]:
if fn_arg.type == "Function" and fn_arg.name not in opt_and_mult_args:
problem_args.add(fn_arg.name)
errors.append(
ValidationError(
type="Assertion",
severity="Error",
msg=f"Function {fn_arg.name} is not allowed as an optional or multiple argument",
visual_pairs=[(fn.span.start, fn.span.end)],
index=fn.span.start,
)
)

elif fn_arg.type == "NSArg" and fn_arg.entity.term is None:
errors.append(
ValidationError(
type="Assertion",
severity="Warning",
msg=f"Unknown namespace value {fn_arg.entity.nsval.key_label} - cannot determine if this matches function signature",
visual_pairs=[(fn.span.start, fn.span.end)],
index=fn.span.start,
)
)
# This handles complex(NSArg, p(X)) validation
elif fn_arg.type == "NSArg" and not intersect(
fn_arg.entity.entity_types, opt_and_mult_args
):
problem_args.add(fn_arg.entity)

problem_args = list(problem_args)
if len(problem_args) > 0:
errors.append(
ValidationError(
type="Assertion",
severity="Error",
msg=f"Not allowed as optional or multiple arguments {problem_args}",
visual_pairs=[(fn.span.start, fn.span.end)],
index=fn.span.start,
errors.append(
ValidationError(
type="Assertion",
severity="Error",
msg=f"Namespace value: {fn_arg.entity.nsval} with entity_types {fn_arg.entity.entity_types} are not allowed for function {fn_arg.parent.name} as an optional or multiple argument",
visual_pairs=[(fn.span.start, fn.span.end)],
index=fn.span.start,
)
)
)

# Fourth pass - positional NSArg entity_types checks
for argument in signature["arguments"]:
Expand All @@ -1100,7 +1139,7 @@ def validate_function(fn: Function, errors: List[ValidationError] = None) -> Lis
ValidationError(
type="Assertion",
severity="Warning",
msg=f"Unknown namespace '{fn.args[position].entity.nsval.namespace}' for the {fn.name} function at position {fn.args[position].span.namespace.start}",
msg=f"Unknown namespace value '{fn.args[position].entity.nsval.key_label}' for the {fn.name} function at position {fn.args[position].span.namespace.start}",
visual_pairs=[
(
fn.args[position].span.namespace.start,
Expand Down Expand Up @@ -1157,7 +1196,7 @@ def validate_function(fn: Function, errors: List[ValidationError] = None) -> Lis

# Modifier function with wrong parent function
if (
fn.function_signature["func_type"] == "modifier"
fn.function_signature["func_type"] == "Modifier"
and fn.parent
and fn.parent.name not in fn.function_signature["primary_function"]
):
Expand Down Expand Up @@ -1243,7 +1282,7 @@ def sort_function_args(fn: Function):
# TODO use https://github.com/biocommons/hgvs to sort by variant position
fn_arg.sort_tuple = (modifier_func_index, fn_arg.name, str(fn_arg))

elif fn_arg.function_type == "modifier":
elif fn_arg.function_type == "Modifier":
fn_arg.sort_tuple = (modifier_func_index, fn_arg.name, str(fn_arg))

elif fn_arg.type == "Function":
Expand Down
22 changes: 13 additions & 9 deletions src/bel/lang/parse.py
Expand Up @@ -85,12 +85,12 @@ def intersect(pos: int, spans: List[Optional[Span]]) -> bool:
return False


def ordered_pairs(a: List[int], b: List[int]) -> List[Union[int, None]]:
"""Return ordered pairs such that every a, b pair has a < b"""
def ordered_pairs(left: List[int], right: List[int]) -> List[Union[int, None]]:
"""Return ordered pairs such that every left, right pair has left < right"""

alt = {"a": "b", "b": "a"}
alt = {"left": "right", "right": "left"}

pairs = [("a", item) for item in a] + [("b", item) for item in b]
pairs = [("left", item) for item in left] + [("right", item) for item in right]
pairs.sort(key=lambda x: x[1])

new_pairs = []
Expand Down Expand Up @@ -141,8 +141,10 @@ def find_matching_quotes(
type="Assertion",
severity="Error",
msg=f"Missing left quote between right quotes at positions {matched_quotes[idx-1].end} and {pair.end}",
visual=html_wrap_span(assertion_str, [(matched_quotes[idx-1].end, pair.end + 1)]),
index=matched_quotes[idx-1].end,
visual=html_wrap_span(
assertion_str, [(matched_quotes[idx - 1].end, pair.end + 1)]
),
index=matched_quotes[idx - 1].end,
)
)
elif pair.end is None and idx == len(matched_quotes):
Expand All @@ -161,7 +163,9 @@ def find_matching_quotes(
type="Assertion",
severity="Error",
msg=f"Missing right quote between left quotes at positions {pair.start} and {matched_quotes[idx+1].start}",
visual=html_wrap_span(assertion_str, [(pair.start, matched_quotes[idx+1].start)]),
visual=html_wrap_span(
assertion_str, [(pair.start, matched_quotes[idx + 1].start)]
),
index=pair.start,
)
)
Expand Down Expand Up @@ -191,7 +195,7 @@ def find_commas(
def find_matching_parens(
assertion_str, matched_quotes, errors: List[ValidationError]
) -> Tuple[List[Pair], List[ValidationError]]:
""" Find and return the location of the matching parentheses pairs in s.
"""Find and return the location of the matching parentheses pairs in s.
Given a string, s, return a dictionary of start: end pairs giving the
indexes of the matching parentheses in s. Suitable exceptions are
Expand Down Expand Up @@ -362,7 +366,7 @@ def find_functions(

def find_nsargs(assertion_str: str) -> List[Optional[NsArgSpan]]:
"""Namespace argument parsing
Namespace IDs and Labels are NOT allowed to have internal double quotes.
IDs or Labels with commas or end parenthesis in them must be quoted.
Expand Down
21 changes: 11 additions & 10 deletions src/bel/nanopub/pubmed.py
Expand Up @@ -127,8 +127,7 @@ def get_pubtator(pmid):


def process_pub_date(year, mon, day, medline_date):
"""Create pub_date from what Pubmed provides in Journal PubDate entry
"""
"""Create pub_date from what Pubmed provides in Journal PubDate entry"""

if medline_date:
year = "0000"
Expand Down Expand Up @@ -258,16 +257,18 @@ def get_pubmed_url(pmid):
pubmed_url = f"{PUBMED_TMPL}{str(pmid)}"

r = http_client.get(pubmed_url)
content = r.content
root = etree.fromstring(content)

except Exception as e:
status_code = None
if r:
status_code = r.status_code
logger.info(f"Status {r.status_code} URL: {pubmed_url}")

if r.status_code == 200:
content = r.content
root = etree.fromstring(content)
else:
logger.warning(f"Could not download pubmed url: {pubmed_url}")

except Exception as e:
logger.warning(
f"Bad Pubmed request, status: {status_code} error: {str(e)}",
f"Bad Pubmed request, error: {str(e)}",
url=f'{PUBMED_TMPL.replace("PMID", pmid)}',
)

Expand Down Expand Up @@ -340,7 +341,7 @@ def get_normalized_terms_for_annotations(term_keys):

def add_annotations(pubmed):
"""Add nanopub annotations to pubmed doc
Enhance MESH terms etc as full-fledged nanopub annotations for use by the BEL Nanopub editor
"""

Expand Down

0 comments on commit 5cdf85a

Please sign in to comment.