In [None]:
from semantic_parsing_with_constrained_lm.scfg.scfg import SCFG
from semantic_parsing_with_constrained_lm.scfg.generate import parse_and_render



In [None]:
scfg = SCFG.from_file("/home/estengel/scratch/scfg_playground/pp.scfg")

In [None]:
string_grammar = scfg.utterance_grammar

In [None]:
from semantic_parsing_with_constrained_lm.scfg.parser.token import TerminalToken, NonterminalToken

def expand(grammar, nonterminal):
    return [x[0] for x in grammar[nonterminal]]

def check_all_terminal(sequence):
    if len(sequence) == 0:
        return False
    for s in sequence:
        if not isinstance(s, TerminalToken):
            return False
    return True 

def generate(grammar, symbol, strings=[]): 
    """Generate all possible strings from a lark CFG
    
    Args:
        lark_grammar (lark.Lark): a lark grammar object
    Returns:
        list of str: all possible strings
    """
    
    def helper(grammar, symbol):
        # case 1: symbol is terminal
        if isinstance(symbol, TerminalToken):
            # add to current string
            return symbol.underlying
            
        # case 2: symbol is nonterminal
        elif isinstance(symbol, NonterminalToken):
            # expand and repeat 
            print(f"expanding {symbol}")
            return [helper(grammar, x) for x in expand(grammar, symbol.underlying)]

        elif isinstance(symbol, str):
            return [helper(grammar, x) for x in expand(grammar, symbol)]
        # case 3: expansion rule has created sequence of options 
        elif type(symbol) in [tuple,list]:
            return [helper(grammar, tok) for tok in symbol]
        else:

            raise ValueError(f"Invalid symbol type: {type(symbol)}")

    strings = helper(grammar, symbol)
    return strings 

In [None]:

strings_to_sample = generate(string_grammar, 'Ambig_PP_sentence_')
print(strings_to_sample[1])

In [None]:
# TODO: take the produce of the nested lists here to sample 

class EnumSampler:
    """Sample everything that hasn't already been sampled"""
    def __call__(self, s, done):
        for x in s:
            if x not in done:
                return x
        return None

done = []

def reduce_singleton(s):
    if len(s) == 1:
        return [s[0]]
    lens = [len(x) for x in s]
    types = [type(x[0]) for x in s]
    if all([x == 1 for x in lens]) and all([x == str for x in types]):
        return [x for l in s for x in l ]
    return s 

sampled = []

def sample(nested_strs, sampler):
    # nested_strs is list of lists, where we can keep going down until we get to a terminal (str)
    print("Nest, ", nested_strs)
    for s in nested_strs:
        if len(s) == 0:
            return None 
        # reduce singleton lists 
        s = reduce_singleton(s)
        if isinstance(s[0], str):
            samp = sampler(s, done)
            if samp is None:
                return None
            done.append(samp)
            sampled.append(samp)
        else:
            sample(s, sampler) 

print(sample(strings_to_sample, EnumSampler()))
print(sampled)

In [None]:
reduce_singleton([['"the boy"'], ['"Galileo"'], ['"the girl"'], ['"the man"']])

In [None]:

# let's test grammar on parsing

input_str = " the boy observed Galileo with binoculars "
interpretation = parse_and_render(scfg, input_str, False)

In [53]:
import json 

input_str_pp = "The man saw the boy with the telescope"
# output_str_pp1 = "(let (e1 SAW e2 HAVE x BOY) (AND (AGENT e1 MAN) ( PATIENT e1 x ) ( AGENT e2 x ) ( PATIENT e2 TELESCOPE )))"
output_str_pp1 = "(let (e1 SAW e2 HAVE x BOY) (AND (AGENT e1 MAN) ( PATIENT e1 x ) ))"
output_str_pp2 = "(let (e1 SAW) (AND ((AGENT e1 MAN) ( PATIENT e1 BOY ) ( INSTRUMENT e1 TELESCOPE ) ) ))"
# output_str_pp1 = "(Yield (FindEventWrapperWithDefaults (EventOnDateWithTimeRange (EventOnDate (NextDOW (Wednesday)) (^(Event) EmptyStructConstraint)) (Afternoon))))"
# output_str_pp2 = "(Yield (FindEventWrapperWithDefaults (EventOnDateWithTimeRange (EventOnDate (NextDOW (Wednesday)) (^(Event) EmptyStructConstraint)) (Afternoon))))"

template = {"dialogue_id": "None",
            "turns": [{"fully_typed_lispress": None,
                       "lispress": None,
                       "program_execution_oracle":{"has_exception":False,"refer_are_correct":True},
                       "skip":False,
                       "turn_index":0,
                       "user_utterance": {"original_text":"", 
                       "tokens":[]},
                       "agent_utterance":{"described_entities":[],"original_text":"hello","tokens":["hello"]}
                    }, {"fully_typed_lispress": None,
                        "lispress": None,
                       "program_execution_oracle":{"has_exception":False,"refer_are_correct":True},
                       "skip":False,
                       "turn_index":0,
                       "user_utterance": {"original_text":"", 
                       "tokens":[]},
                       "agent_utterance":{"described_entities":[],"original_text":"hello","tokens":["hello"]}
                    }]
}

entry_0 = {k:v for k, v in template.items()}
entry_0['dialogue_id'] = "00"
entry_0['turns'][0]['user_utterance']['original_text'] = input_str_pp
entry_0['turns'][0]['user_utterance']['tokens'] = input_str_pp.split(" ")
entry_0['turns'][0]['fully_typed_lispress'] = output_str_pp1
entry_0['turns'][0]['lispress'] = output_str_pp1

entry_0 = {k:v for k, v in template.items()}
entry_0['dialogue_id'] = "00"
entry_0['turns'][1]['user_utterance']['original_text'] = input_str_pp
entry_0['turns'][1]['user_utterance']['tokens'] = input_str_pp.split(" ")
entry_0['turns'][1]['fully_typed_lispress'] = output_str_pp2
entry_0['turns'][1]['lispress'] = output_str_pp2



with open("/home/estengel/semantic_parsing_with_constrained_lm/src/semantic_parsing_with_constrained_lm/domains/ambig/data/example.jsonl", "w") as f1:
    for line in [entry_0]:
        f1.write(json.dumps(line) + "\n")



In [54]:
import os
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION']  = "python"
from semantic_parsing_with_constrained_lm.domains.ambig.create_benchclamp_data import extract_and_write_grammar

extract_and_write_grammar(
                "/home/estengel/semantic_parsing_with_constrained_lm/src/semantic_parsing_with_constrained_lm/domains/ambig/data/example.jsonl",
                "/home/estengel/semantic_parsing_with_constrained_lm/src/semantic_parsing_with_constrained_lm/domains/ambig/grammar",
                whitelisted_dialogue_ids=None,
            )




[A[A[A

Extracting grammar ...


ValueError: Could not parse: ['AND', ['AGENT', 'e1', 'MAN'], ['PATIENT', 'e1', 'x']]

In [48]:
from dataflow.core.lispress import parse_lispress

parse_lispress("(let (e1 SAW e2 HAVE x BOY) (& ((AGENT e1 MAN) ( PATIENT e1 x ) ( AGENT e2 x ) ( PATIENT e2 TELESCOPE ))))") 

['let',
 ['e1', 'SAW', 'e2', 'HAVE', 'x', 'BOY'],
 ['&',
  [['AGENT', 'e1', 'MAN'],
   ['PATIENT', 'e1', 'x'],
   ['AGENT', 'e2', 'x'],
   ['PATIENT', 'e2', 'TELESCOPE']]]]

In [None]:
(let (e1 SAW) (& ((AGENT e1 AMAN) ( PATIENT e1 BOY ) ( INSTRUMENT e1 TELESCOPE ) ) ))

(& ( (AGENT e1 MAN) ( PATIENT e1 x ) ( AGENT e2 x ) ( PATIENT e2 TELESCOPE )))

(let (e1 SAW e2 HAVE x BOY) (& ((AGENT e1 MAN) ( PATIENT e1 x ) ( AGENT e2 x ) ( PATIENT e2 TELESCOPE ))))