In [2]:
from lark import Lark, Tree, Token
json_parser = Lark(r"""
    pair: pair "," pair // 1
         | string // 2
    string : "a" // 3
        | "b" // 4

    %import common.WS
    %ignore WS
    """, start='pair', keep_all_tokens=True)

text = 'a'
ast = json_parser.parse(text)
print(ast)
print(ast.pretty())
# rule seq
rule_seq = ['pair', 'string', "1"]
rule_seq2 = ['pair->string', 'string->1']

text = "a, b"
ast = json_parser.parse(text)
print(ast)
print(ast.pretty())
rule_seq2 = ['pair->pair "," pair', 'pair->string', 'pair->string', 'string->a', 'string->b']
rule_seq2 = [rule.split('->') for rule in rule_seq2]
rule_seq3 = [1, 2, 2, 3, 4]

non_terminals = ['pair', 'string']
terminals = [",", "a", "b"]

def is_terminal(sym):
    # true if matches hardcoded symbols in grammar or a regex, note this only works if the nt has been checked first.
    return sym in terminals  # or matches regex

def is_non_terminal(sym):
    return sym in non_terminals

def build_lark_tree(rule_seq:list[tuple]) -> Tree:
    print(rule_seq)
    nt, next_syms = rule_seq[0]
    if len(rule_seq) == 1:
        return Tree(nt, [Token('literal', next_syms)])
    else:
        rule_seq = rule_seq[1:]
        next_syms = next_syms.split(" ")
        asts = []
        nt_idx = 0
        for next_sym in next_syms:
            if is_non_terminal(next_sym):
                ast = Tree(next_sym, build_lark_tree(rule_seq[nt_idx:]))
                nt_idx += 1
            elif is_terminal(next_sym):
                ast = Token('literal', next_sym)
            else:
                raise ValueError(f'Invalid: {next_sym} didnt match anything')
            asts.append(ast)
        return Tree(nt, asts)

print('---- generating ast from Rule Seq')
build_lark_tree(rule_seq2)

Tree('pair', [Tree('string', [Token('A', 'a')])])
pair
  string	a

Tree('pair', [Tree('pair', [Tree('string', [Token('A', 'a')])]), Token('COMMA', ','), Tree('pair', [Tree('string', [Token('B', 'b')])])])
pair
  pair
    string	a
  ,
  pair
    string	b

---- generating ast from Rule Seq
[['pair', 'pair "," pair'], ['pair', 'string'], ['pair', 'string'], ['string', 'a'], ['string', 'b']]


KeyboardInterrupt: 

In [7]:
from typing import Union

from collections import deque

from lark import Lark, Tree, Token
from lark.grammar import Rule, NonTerminal, Symbol, Terminal
from lark.reconstruct import Reconstructor

grammar = r"""
    pair: pair "," pair // 1
         | string // 2
    string : "a" // 3
        | "b" // 4

    %import common.WS
    %ignore WS
    """
parser = Lark(grammar, start='pair', keep_all_tokens=True)
print(parser.rules)
print()
print(parser.rules[0])

[Rule(NonTerminal('pair'), [NonTerminal('pair'), Terminal('COMMA'), NonTerminal('pair')], None, RuleOptions(True, False, None, None)), Rule(NonTerminal('pair'), [NonTerminal('string')], None, RuleOptions(True, False, None, None)), Rule(NonTerminal('string'), [Terminal('A')], None, RuleOptions(True, False, None, None)), Rule(NonTerminal('string'), [Terminal('B')], None, RuleOptions(True, False, None, None))]

<pair : pair COMMA pair>
