In [14]:
from semantic_parsing_with_constrained_lm.scfg.scfg import SCFG
from semantic_parsing_with_constrained_lm.scfg.generate import parse_and_render



In [15]:
scfg = SCFG.from_file("/home/estengel/scratch/scfg_playground/pp.scfg")

In [16]:
string_grammar = scfg.utterance_grammar

In [37]:
from semantic_parsing_with_constrained_lm.scfg.parser.token import TerminalToken, NonterminalToken

def expand(grammar, nonterminal):
    return [x[0] for x in grammar[nonterminal]]

def check_all_terminal(sequence):
    if len(sequence) == 0:
        return False
    for s in sequence:
        if not isinstance(s, TerminalToken):
            return False
    return True 

def generate(grammar, symbol, strings=[]): 
    """Generate all possible strings from a lark CFG
    
    Args:
        lark_grammar (lark.Lark): a lark grammar object
    Returns:
        list of str: all possible strings
    """
    
    def helper(grammar, symbol):
        # case 1: symbol is terminal
        if isinstance(symbol, TerminalToken):
            # add to current string
            return symbol.underlying
            
        # case 2: symbol is nonterminal
        elif isinstance(symbol, NonterminalToken):
            # expand and repeat 
            print(f"expanding {symbol}")
            return [helper(grammar, x) for x in expand(grammar, symbol.underlying)]

        elif isinstance(symbol, str):
            return [helper(grammar, x) for x in expand(grammar, symbol)]
        # case 3: expansion rule has created sequence of options 
        elif type(symbol) in [tuple,list]:
            return [helper(grammar, tok) for tok in symbol]
        else:

            raise ValueError(f"Invalid symbol type: {type(symbol)}")

    strings = helper(grammar, symbol)
    return strings 

In [29]:

strings_to_sample = generate(string_grammar, 'Ambig_PP_sentence_')

expanding Ambig_PP_sentence_
[(NonterminalToken(underlying='NP_animate', optional=False), NonterminalToken(underlying='V_observe', optional=False), NonterminalToken(underlying='NP_animate', optional=False), NonterminalToken(underlying='PP_visual_instr', optional=False)), (NonterminalToken(underlying='NP_animate', optional=False), NonterminalToken(underlying='V_observe', optional=False), NonterminalToken(underlying='NP_PP_visual_instr', optional=False))]
expanding NonterminalToken(underlying='NP_animate', optional=False)
expanding NonterminalToken(underlying='V_observe', optional=False)
expanding NonterminalToken(underlying='NP_animate', optional=False)
expanding NonterminalToken(underlying='PP_visual_instr', optional=False)
expanding NonterminalToken(underlying='NP_visual_instr', optional=False)
expanding NonterminalToken(underlying='NP_animate', optional=False)
expanding NonterminalToken(underlying='V_observe', optional=False)
expanding NonterminalToken(underlying='NP_PP_visual_instr'

In [50]:
# TODO: take the produce of the nested lists here to sample 

class EnumSampler:
    """Sample everything that hasn't already been sampled"""
    def __call__(self, s, done):
        for x in s:
            if x not in done:
                return x
        return None

done = []

def reduce_singleton(s):
    if len(s) == 1:
        return [s[0]]
    lens = [len(x) for x in s]
    types = [type(x[0]) for x in s]
    if all([x == 1 for x in lens]) and all([x == str for x in types]):
        return [x for l in s for x in l ]
    return s 

sampled = []

def sample(nested_strs, sampler):
    # nested_strs is list of lists, where we can keep going down until we get to a terminal (str)
    print("Nest, ", nested_strs)
    for s in nested_strs:
        if len(s) == 0:
            return None 
        # reduce singleton lists 
        s = reduce_singleton(s)
        if isinstance(s[0], str):
            samp = sampler(s, done)
            if samp is None:
                return None
            done.append(samp)
            sampled.append(samp)
        else:
            sample(s, sampler) 



print(sample(strings_to_sample, EnumSampler()))
print(sampled)

Nest,  [[[['"the boy"'], ['"Galileo"'], ['"the girl"'], ['"the man"']], [['" observed "'], ['" saw "'], ['" spotted "']], [['"the boy"'], ['"Galileo"'], ['"the girl"'], ['"the man"']], [['"with a "', [['"binoculars"'], ['"opera glasses"'], ['"a telescope"']]]]], [[['"the boy"'], ['"Galileo"'], ['"the girl"'], ['"the man"']], [['" observed "'], ['" saw "'], ['" spotted "']], [[[['"the boy"'], ['"Galileo"'], ['"the girl"'], ['"the man"']], '" with a "', [['"binoculars"'], ['"opera glasses"'], ['"a telescope"']]]]]]
Nest,  [[['"the boy"'], ['"Galileo"'], ['"the girl"'], ['"the man"']], [['" observed "'], ['" saw "'], ['" spotted "']], [['"the boy"'], ['"Galileo"'], ['"the girl"'], ['"the man"']], [['"with a "', [['"binoculars"'], ['"opera glasses"'], ['"a telescope"']]]]]
Nest,  [['"with a "', [['"binoculars"'], ['"opera glasses"'], ['"a telescope"']]]]
Nest,  [[['"the boy"'], ['"Galileo"'], ['"the girl"'], ['"the man"']], [['" observed "'], ['" saw "'], ['" spotted "']], [[[['"the boy"']

In [44]:
reduce_singleton([['"the boy"'], ['"Galileo"'], ['"the girl"'], ['"the man"']])

[1, 1, 1, 1] [<class 'str'>, <class 'str'>, <class 'str'>, <class 'str'>]


['"the boy"', '"Galileo"', '"the girl"', '"the man"']

In [1]:
def dfs_generate(grammar, start_symbol):


    discovered = []
    nodes_list = []
    def dfs_helper(grammar, node):
        discovered.append(node)
        if isinstance(node, TerminalToken):
            nodes_list.append(node.underlying)

        elif isinstance(node, NonterminalToken):
            name = node.underlying
            expansions = expand(grammar, name)
            for expansion_tuple in expansions:
                for expansion_node in expansion_tuple:
                    if expansion_node not in discovered:
                        dfs_helper(grammar, expansion_node)

        elif isinstance(node, str):
            name = node
            expansions = expand(grammar, name)
            for expansion_tuple in expansions:
                for expansion_node in expansion_tuple:
                    if expansion_node not in discovered:
                        dfs_helper(grammar, expansion_node)

    dfs_helper(grammar, start_symbol)
    return nodes_list

In [13]:
def iter_dfs(grammar, start_symbol):
    # frontier is a stack 
    frontier = []
    frontier.append(start_symbol)
    discovered = []
    visited = []
    while len(frontier) > 0:
        s = frontier[-1]
        frontier.pop()
        if s not in discovered:
            try:
                discovered.append(s.underlying)
            except AttributeError:
                discovered.append(s)
            if isinstance(s, TerminalToken):
                print(s.underlying)
            elif isinstance(s, NonterminalToken):
                expansions = expand(grammar, s.underlying)
                for expansion_tuple in expansions:
                    for expansion_node in expansion_tuple:
                        frontier.append(expansion_node)
            elif isinstance(s, str):
                expansions = expand(grammar, s)
                for expansion_tuple in expansions:
                    for expansion_node in expansion_tuple:
                        frontier.append(expansion_node)

iter_dfs(string_grammar, "Ambig_PP_sentence_")

"a telescope"
"opera glasses"
"binoculars"
" with a "
"the man"
"the girl"
"Galileo"
"the boy"
" spotted "
" saw "
" observed "
"the man"
"the girl"
"Galileo"
"the boy"
"a telescope"
"opera glasses"
"binoculars"
"with a "
"the man"
"the girl"
"Galileo"
"the boy"
" spotted "
" saw "
" observed "
"the man"
"the girl"
"Galileo"
"the boy"
