Skip to content

Commit

Permalink
Add relation completion to BEL completion
Browse files Browse the repository at this point in the history
Can now handle full BEL statement including nested objects with addition of BEL relation completion capability.
  • Loading branch information
wshayes committed Jan 21, 2018
1 parent d84d371 commit 36eb166
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 21 deletions.
77 changes: 65 additions & 12 deletions bel/lang/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,30 @@ def cursor(belstr: str, ast: AST, cursor_loc: int, result: Mapping[str, Any] = N
# log.debug(f'SubAST: {json.dumps(ast, indent=4)}')

# Recurse down through subject, object, nested to functions
if 'span' not in ast and isinstance(ast, dict):
log.debug(f'Cursor keys {ast.keys()}')

if 'relation' in ast and in_span(cursor_loc, ast['relation']['span']):
log.debug('In relation')

completion_text = belstr[ast['relation']['span'][0]:cursor_loc + 1]
return {
'type': 'Relation',
'replace_span': ast['relation']['span'],
'completion_text': completion_text,
}

# Handle subject, object and nested keys in tree
elif 'span' not in ast and isinstance(ast, dict):
for key in ast:
log.debug(f'Recursing Keys {key}')
result = cursor(belstr, ast[key], cursor_loc, result=result)
if result:
return result
if key in ['subject', 'object', 'nested']:
log.debug(f'Recursing Keys {key}')
result = cursor(belstr, ast[key], cursor_loc, result=result)
if result:
return result

# Process span matches (functions, nsargs, strargs)
# Matches Functions, NSArgs and StrArgs/StrArgNSArg
if 'span' in ast and in_span(cursor_loc, ast['span']):
log.debug('Inside subject/object subAST')
if 'function' in ast:
name_span = ast['function']['name_span']
if in_span(cursor_loc, name_span):
Expand Down Expand Up @@ -114,7 +129,7 @@ def cursor(belstr: str, ast: AST, cursor_loc: int, result: Mapping[str, Any] = N
else:
result['completion_text'] = belstr[arg['nsarg']['ns_span'][0]:cursor_loc + 1]

log.debug('Found replace_span in args: NSArg type')
log.debug(f'Found replace_span in args: NSArg {result}')
return result
elif arg['type'] == 'StrArg': # in case this is a default namespace StrArg
if arg['span'][0] == arg['span'][1]: # handle case like p() cursor=2
Expand All @@ -128,8 +143,7 @@ def cursor(belstr: str, ast: AST, cursor_loc: int, result: Mapping[str, Any] = N
'parent_function': ast['function']['name'],
'completion_text': completion_text,
}

return result
return result # needed to pass result back up recursive stack


def nsarg_completions(completion_text: str, entity_types: list, bel_spec: BELSpec, namespace: str, species_id: str, bel_fmt: str, size: int):
Expand Down Expand Up @@ -189,6 +203,37 @@ def nsarg_completions(completion_text: str, entity_types: list, bel_spec: BELSpe
return replace_list[:size]


def relation_completions(completion_text: str, bel_spec: BELSpec, bel_fmt: str, size: int) -> list:
"""Filter BEL relations by prefix
Args:
prefix: completion string
bel_fmt: short, medium, long BEL formats
spec: BEL specification
Returns:
list: list of BEL relations that match prefix
"""

if bel_fmt == 'short':
relation_list = bel_spec['relations']['list_short']
else:
relation_list = bel_spec['relations']['list_long']

matches = []
for r in relation_list:
print('R', r, 'C', completion_text)
if re.match(completion_text, r):
matches.append(r)

replace_list = []
for match in matches:
highlight = match.replace(completion_text, f'<em>{completion_text}</em>')
replace_list.append({'replacement': match, 'label': match, 'highlight': highlight})

return replace_list[:size]


def function_completions(completion_text: str, bel_spec: BELSpec, function_list: list, bel_fmt: str, size: int) -> list:
"""Filter BEL functions by prefix
Expand Down Expand Up @@ -358,6 +403,8 @@ def get_completions(belstr: str, cursor_loc: int, bel_spec: BELSpec, bel_comp: s

ast, errors = pparse.get_ast_dict(belstr)

# print('AST:\n', json.dumps(ast, indent=4))

# TODO - update collect_spans to use AST
spans = []
# spans = pparse.collect_span(ast)
Expand All @@ -367,6 +414,8 @@ def get_completions(belstr: str, cursor_loc: int, bel_spec: BELSpec, bel_comp: s

log.debug(f'Cursor location BELstr: {belstr} Cursor idx: {cursor_loc}')
cursor_results = cursor(belstr, ast, cursor_loc)
log.debug(f'Cursor results: {cursor_results}')

if not cursor_results:
log.debug('Cursor results is empty')
return ([], [], [], [])
Expand All @@ -384,9 +433,11 @@ def get_completions(belstr: str, cursor_loc: int, bel_spec: BELSpec, bel_comp: s
arg_idx = cursor_results.get('arg_idx')

replace_list = arg_completions(completion_text, parent_function, args, arg_idx, bel_spec, bel_fmt, species_id, namespace, size)
else:
elif cursor_results['type'] == 'Function':
function_list = None
replace_list = function_completions(completion_text, bel_spec, function_list, bel_fmt, size)
elif cursor_results['type'] == 'Relation':
replace_list = relation_completions(completion_text, bel_spec, bel_fmt, size)

completions.extend(add_completions(replace_list, belstr, replace_span, completion_text))

Expand Down Expand Up @@ -441,8 +492,10 @@ def bel_completion(belstr: str, cursor_loc: int = -1, bel_version: str = default
elif cursor_loc >= belstrlen:
cursor_loc = belstrlen - 1

with timy.Timer() as timer:
(completion_text, completions, function_help, spans) = get_completions(belstr, cursor_loc, bel_spec, bel_comp, bel_fmt, species_id, size)
# with timy.Timer() as timer:
# (completion_text, completions, function_help, spans) = get_completions(belstr, cursor_loc, bel_spec, bel_comp, bel_fmt, species_id, size)

(completion_text, completions, function_help, spans) = get_completions(belstr, cursor_loc, bel_spec, bel_comp, bel_fmt, species_id, size)

return {'completion_text': completion_text, 'completions': completions, 'function_help': function_help, 'entity_spans': spans}

Expand Down
36 changes: 29 additions & 7 deletions bel/lang/partialparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
start_arg_chars = ['(', ',']
end_arg_chars = [')', ',']

relations_pattern = re.compile('\)\s+([a-zA-Z=->\|:]+)\s+([\w(]+)')
relations_pattern_middle = re.compile('\)\s+([a-zA-Z\=\-\>\|\:]+)\s+[\w\(]+')
relations_pattern_end = re.compile('\)\s+([a-zA-Z\=\-\>\|\:]+)\s*$')

Errors = List[Tuple[str, str, Optional[Tuple[int, int]]]] # (<"Error"|"Warning">, "message", (start_span, end_span))
Parsed = MutableMapping[str, Any]
Expand Down Expand Up @@ -176,8 +177,8 @@ def parse_chars(bels: list, errors: Errors) -> Tuple[CharLocs, Errors]:
parens[pstack.pop()] = (-1, 'top')

while len(nested_pstack):
errors.append(('ERROR', f'Missing right parenthesis for nested object left parenthesis at location {pstack[-1]}', (pstack[-1], pstack[-1])))
parens[pstack.pop()] = (-1, 'top')
errors.append(('ERROR', f'Missing right parenthesis for nested object left parenthesis at location {nested_pstack[-1]}', (nested_pstack[-1], nested_pstack[-1])))
nested_parens[nested_pstack.pop()] = (-1, 'top')

if len(qstack):
missing_quote = qstack.pop()
Expand Down Expand Up @@ -356,8 +357,25 @@ def parse_relations(belstr: str, char_locs: CharLocs, parsed: Parsed, errors: Er
quotes = char_locs['quotes']
quoted_range = set([i for start, end in quotes.items() for i in range(start, end)])

for match in relations_pattern.finditer(belstr):
for match in relations_pattern_middle.finditer(belstr):
(start, end) = match.span(1)
log.debug(f'Relation-middle {match}')
end = end - 1 # adjust end to match actual end character index
if start != end:
test_range = set(range(start, end))
else:
test_range = set(start)

# Skip if relation overlaps with quoted string
if test_range.intersection(quoted_range):
continue

span_key = (start, end)
parsed[span_key] = {'type': 'Relation', 'name': match.group(1), 'span': (start, end)}

for match in relations_pattern_end.finditer(belstr):
(start, end) = match.span(1)
log.debug(f'Relation-end {match}')
end = end - 1 # adjust end to match actual end character index
if start != end:
test_range = set(range(start, end))
Expand All @@ -379,6 +397,8 @@ def parse_nested(bels: list, char_locs: CharLocs, parsed: Parsed, errors: Errors

for sp in char_locs['nested_parens']: # sp = start parenthesis, ep = end parenthesis
ep, level = char_locs['nested_parens'][sp]
if ep == -1:
ep = len(bels) + 1
parsed[(sp, ep)] = {'type': 'Nested', 'span': (sp, ep)}

return parsed, errors
Expand Down Expand Up @@ -467,8 +487,7 @@ def print_spans(spans, max_idx: int) -> None:
def parsed_function_to_ast(parsed: Parsed, parsed_key):
"""Create AST for top-level functions
"""
# dump_json(parsed)
# quit()

sub = parsed[parsed_key]

subtree = {
Expand Down Expand Up @@ -578,6 +597,9 @@ def parsed_to_ast(parsed: Parsed, errors: Errors, component_type: str = ''):
are parsing the subject or object field input
"""

# dump_json(parsed)
# quit()

ast = {}
sorted_keys = sorted(parsed.keys())

Expand Down Expand Up @@ -612,7 +634,7 @@ def parsed_to_ast(parsed: Parsed, errors: Errors, component_type: str = ''):
ast['nested']['relation'] = {
'name': parsed[nested_key]['name'],
'type': 'Relation',
'span': key,
'span': parsed[nested_key]['span'],
}

return ast, errors
Expand Down
67 changes: 66 additions & 1 deletion tests/lang/test_completion.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import bel.lang.completion
import pytest
import json

import bel.lang.completion
import bel.utils

from bel.Config import config


def test_completion_fn_name_start_long():

Expand Down Expand Up @@ -59,6 +64,9 @@ def test_completion_arg_fn_2():

def test_completion_arg_ns_prefix():

if bel.utils.get_url(f"{config['bel_api']['servers']['api_url']}/simple_status").status_code != 200:
pytest.xfail('BEL.bio API Test environment is not setup')

completions = bel.lang.completion.bel_completion('complex(p(HGNC:EGFR))', cursor_loc=18, bel_fmt='medium')
print('Completions:\n', json.dumps(completions, indent=4))
assert completions["completion_text"] == "EGFR"
Expand All @@ -70,6 +78,9 @@ def test_completion_arg_ns_prefix():

def test_completion_arg_ns_val():

if bel.utils.get_url(f"{config['bel_api']['servers']['api_url']}/simple_status").status_code != 200:
pytest.xfail('BEL.bio API Test environment is not setup')

completions = bel.lang.completion.bel_completion('complex(p(HGNC:EGFR))', cursor_loc=12, bel_fmt='medium')
print('Completions:\n', json.dumps(completions, indent=4))
assert completions["completion_text"] == "HGN"
Expand Down Expand Up @@ -100,3 +111,57 @@ def test_completion_arg_StrArgNSArg_2():
assert completions['entity_spans'] == []
assert completions['function_help'] != []


def test_completion_relation_end():

completions = bel.lang.completion.bel_completion('complex(p(HGNC:EGFR, pmod(pa))) inc', bel_fmt='medium')
print('Completions:\n', json.dumps(completions, indent=4))
assert completions["completion_text"] == "inc"
assert completions['completions'][0]['replacement'] == "complex(p(HGNC:EGFR, pmod(pa))) increases"
assert completions['completions'][0]['cursor_loc'] == 41
assert completions['entity_spans'] == []
assert completions['function_help'] == []


def test_completion_relation_end_short():

completions = bel.lang.completion.bel_completion('complex(p(HGNC:EGFR, pmod(pa))) ->', bel_fmt='short')
print('Completions:\n', json.dumps(completions, indent=4))
assert completions["completion_text"] == "->"
assert completions['completions'][0]['replacement'] == "complex(p(HGNC:EGFR, pmod(pa))) ->"
assert completions['completions'][0]['cursor_loc'] == 34
assert completions['entity_spans'] == []
assert completions['function_help'] == []


def test_completion_relation_end_short_cursorloc():

completions = bel.lang.completion.bel_completion('complex(p(HGNC:EGFR, pmod(pa))) -> ', cursor_loc=32, bel_fmt='short')
print('Completions:\n', json.dumps(completions, indent=4))
assert ["--", "->", "-|"] == sorted([c['label'] for c in completions["completions"]])
assert ["complex(p(HGNC:EGFR, pmod(pa))) -> "] == [c['replacement'] for c in completions["completions"] if c['replacement'] == "complex(p(HGNC:EGFR, pmod(pa))) -> "]
assert completions['completions'][0]['cursor_loc'] == 34
assert completions['entity_spans'] == []
assert completions['function_help'] == []


def test_completion_nested_relation_end():

completions = bel.lang.completion.bel_completion('complex(p(HGNC:EGFR, pmod(pa))) increases (p(HGNC:EGF) dec', bel_fmt='medium')
print('Completions:\n', json.dumps(completions, indent=4))
assert completions["completion_text"] == "dec"
assert completions['completions'][0]['replacement'] == "complex(p(HGNC:EGFR, pmod(pa))) increases (p(HGNC:EGF) decreases"
assert completions['completions'][0]['cursor_loc'] == 64
assert completions['entity_spans'] == []
assert completions['function_help'] == []


def test_completion_nested_relation():

completions = bel.lang.completion.bel_completion('complex(p(HGNC:EGFR, pmod(pa))) increases (p(HGNC:EGF) decreases p(HGNC:AKT1))', cursor_loc=60, bel_fmt='medium')
print('Completions:\n', json.dumps(completions, indent=4))
assert completions["completion_text"] == "decrea"
assert completions['completions'][0]['replacement'] == "complex(p(HGNC:EGFR, pmod(pa))) increases (p(HGNC:EGF) decreases p(HGNC:AKT1))"
assert completions['completions'][0]['cursor_loc'] == 64
assert completions['entity_spans'] == []
assert completions['function_help'] == []
2 changes: 1 addition & 1 deletion tests/lang/test_semantic_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_valid_statements():
# VALID STATEMENT TEST CASES #
##############################
@pytest.mark.skip(reason="Skip for now - have Github Issue to fix it")
def test_arg_values():
def test_complex_nsarg():
stmts = [
'activity(complexAbundance(SCOMP:"TORC2 Complex"), molecularActivity(DEFAULT:kin))'
]
Expand Down

0 comments on commit 36eb166

Please sign in to comment.