In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
!pip install pygraphviz



In [3]:
from incremental_parsing.lex_earley.middle_earley import create_middle_bnf, create_parse_hierarchy
from incremental_parsing.lex_earley.lark_grammar import get_calc_lang_context
from incremental_parsing.lex_earley.lex_earley import lex_earley_init, lex_earley_run, _to_suffix_parser_state, force_eof
from incremental_parsing.lex_earley.earley_nfa import EarleyNFA, tokens_to_nfa
import networkx as nx

In [4]:
context = get_calc_lang_context()
init_state = lex_earley_init(context)

In [5]:
prefix = "(1"
suffix = ""
pre_branch_num = 0
post_branch_num = 0

In [6]:
pre = lex_earley_run(context=context, state=init_state, value=prefix)
g_pre = create_parse_hierarchy(context.grammar, pre.branches[pre_branch_num].parser_state.earley_trie.charts, (len(pre.branches[pre_branch_num].parser_state.earley_trie.charts) - 1,))
ag = nx.nx_agraph.to_agraph(g_pre.reverse())  # Formalization assumes arrows go in one direction, implementation has arrows go in other direction
ag.layout(prog="dot")
ag.draw("pre.png")
suf = _to_suffix_parser_state(context=context, state=pre, suffix=suffix, make_dummy_trie=True)
post = lex_earley_run(context=context, state=suf, value=suffix)
post_eof = force_eof(context, post)
branch = post_eof.branches[post_branch_num]
tokens_reverse = list(branch.suffix_state.parser_state.earley_trie.get_reverse_token_sequence())
token_nfa, final_states = tokens_to_nfa(tokens_reverse)
earley_nfa = EarleyNFA(context.grammar.reverse(), token_nfa)
g_post = create_parse_hierarchy(context.grammar, earley_nfa.charts, final_states, reverse_state_positions=True)
ag = nx.nx_agraph.to_agraph(g_post.reverse())
ag.layout(prog="dot")
ag.draw("post.png")


In [7]:
r = create_middle_bnf(context.grammar, g_pre, g_post, (len(pre.branches[pre_branch_num].parser_state.earley_trie.charts) - 1,), final_states)
print(str(r))
print(r.top_level_rules)

comma_expr : COMMA expression
 | COMMA expression comma_expr

expression : LPAR expression RPAR
 | LPAR expression comma_expr RPAR
 | expression binop expression
 | unop expression
 | NUMBER

binop : MINUS
 | PLUS
 | STAR
 | SLASH

unop : MINUS

start<0-> : expression<0->

expression<0-> : RPAR
 | comma_expr RPAR
 | expression<0-> binop expression
 | expression<1-> RPAR
 | expression<1-> comma_expr RPAR

expression<1-> : binop expression
 | expression<1-> binop expression
('start<0->',)


In [8]:
from incremental_parsing.lex_earley.middle_earley import create_bnf_direct

relevant_charts = pre.branches[pre_branch_num].parser_state.earley_trie.charts
r = create_bnf_direct(grammar=context.grammar, final_chart_indices=(len(relevant_charts) - 1,), charts=relevant_charts, is_right_context=False)
print(str(r))
print(r.top_level_rules)

comma_expr : COMMA expression
 | COMMA expression comma_expr

expression : LPAR expression RPAR
 | LPAR expression comma_expr RPAR
 | expression binop expression
 | unop expression
 | NUMBER

binop : MINUS
 | PLUS
 | STAR
 | SLASH

unop : MINUS

expression<1-> : binop<2-> expression
 | λ
 | binop expression
 | expression<1-> binop expression

expression<0-> : RPAR
 | expression<1-> RPAR
 | expression<1-> comma_expr RPAR
 | expression<0-> binop expression
 | comma_expr RPAR
 | comma_expr<2-> RPAR

comma_expr<2-> : COMMA expression
 | COMMA expression comma_expr

binop<2-> : SLASH
 | PLUS
 | MINUS
 | STAR

start<0-> : expression<0->
('start<0->',)
