## Recasting

- This time, with and without prompting 
- Instructions: 
    - Please answer the following yes-no question about this sentence: <Sentence>
- Volition: 
    - ``In the event "<event>", did the <arg> act on purpose?``
    - Need to edit events and args to make grammatical 
- Change of state
    - ``In the event "<event>", did the state of <arg> change?``






In [1]:
from decomp import UDSCorpus 


c = UDSCorpus()

In [2]:
import sys 
from tqdm import tqdm 
volition_examples = []
change_of_state_examples = []


def extract_prompt_info(graph, edge): 
    pred_node, arg_node = edge 
    try:
        pred = graph.head(pred_node)[1][0]
        arg = graph.head(arg_node)[1][0]
    except IndexError:
        pred, arg = None, None
    return pred, arg

for gname in tqdm(c): 
    g = c[gname]
    sent = g.sentence
    try:
        subg = g.semantics_subgraph
    except AttributeError:
        continue
    if len(subg.nodes) == 0:
        continue
    for edge in subg.edges: 
        n1, n2 = edge 

        edge_props = subg.edges[edge]
        if "protoroles" in edge_props: 
            if "volition" in edge_props['protoroles'] and abs(edge_props['protoroles']['volition']['value']) > 1: 
                pred, arg = extract_prompt_info(g, edge)
                volition_example = (gname, edge, pred, arg, sent, edge_props['protoroles']['volition'])
                volition_examples.append(volition_example)
                
            if "change_of_state" in edge_props['protoroles'] and abs(edge_props['protoroles']['change_of_state']['value']) > 1: 
                pred, arg = extract_prompt_info(g, edge)
                change_of_state_example = (gname, edge, pred, arg, sent, edge_props['protoroles']['change_of_state'])
                change_of_state_examples.append(change_of_state_example)


100%|██████████| 16622/16622 [00:03<00:00, 4582.53it/s]


In [3]:

class VolitionTemplate:
    def __init__(self, sent, pred, arg, value=None): 
        self.sent = sent 
        self.pred = pred    
        self.arg = arg
        self.value = value 

    def __str__(self):
        to_ret = f"Sentence: \"{self.sent}\"\n" + \
                 f"Question: In the event \"{self.pred}\", does the participant \"{self.arg}\" act with volition?\nAnswer: " 
        if self.value is not None:
            if self.value > 0: 
                ans = "Yes" 
            else:
                ans = "No"
            to_ret += f"{ans}" 
        return to_ret 

class ChangeOfStateTemplate:
    def __init__(self, sent, pred, arg, value=None): 
        self.sent = sent 
        self.pred = pred    
        self.arg = arg
        self.value = value

    def __str__(self):
        to_ret = f"Sentence: \"{self.sent}\"\n" + \
                   f"Question: In the event \"{self.pred}\", does the state of the participant \"{self.arg}\" change?\nAnswer: " 
        if self.value is not None: 
            if self.value > 0: 
                ans = "Yes" 
            else:
                ans = "No"
            to_ret += f"{ans}" 
        return to_ret 

class VolitionTemplate2:
    def __init__(self, sent, pred, arg, value=None): 
        self.sent = sent 
        self.pred = pred    
        self.arg = arg
        self.value = value 

    def __str__(self):
        to_ret = f"Sentence: \"{self.sent}\"\n" + \
                 f"Question: In the event \"{self.pred}\", does the participant \"{self.arg}\" act on purpose?\nAnswer: " 
        if self.value is not None:
            if self.value > 0: 
                ans = "Yes" 
            else:
                ans = "No"
            to_ret += f"{ans}" 
        return to_ret 

class ChangeOfStateTemplate2:
    def __init__(self, sent, pred, arg, value=None): 
        self.sent = sent 
        self.pred = pred    
        self.arg = arg
        self.value = value

    def __str__(self):
        to_ret = f"Sentence: \"{self.sent}\"\n" + \
                   f"Question: In the event \"{self.pred}\", does the participant \"{self.arg}\" change in state?\nAnswer: " 
        if self.value is not None: 
            if self.value > 0: 
                ans = "Yes" 
            else:
                ans = "No"
            to_ret += f"{ans}" 
        return to_ret 

In [4]:
import re

# limit sentences to 35 tokens to avoid overly complicated ones 
max_len = 35

volition_templates1 = []
change_of_state_templates1 = []
volition_templates2 = []
change_of_state_templates2 = []
for example in volition_examples:
    gname, edge, pred, arg, sent, val_dict = example
    if len(re.split("\s+", sent)) > max_len:
        continue
    t = VolitionTemplate(sent, pred, arg, val_dict['value'])
    volition_templates1.append(t)
    t2 = VolitionTemplate2(sent, pred, arg, val_dict['value'])
    volition_templates2.append(t2)

for example in change_of_state_examples:
    gname, edge, pred, arg, sent, val_dict = example
    if len(re.split("\s+", sent)) > max_len:
        continue
    t = ChangeOfStateTemplate(sent, pred, arg, val_dict['value'])
    change_of_state_templates1.append(t)
    t2 = ChangeOfStateTemplate2(sent, pred, arg, val_dict['value'])
    change_of_state_templates2.append(t2)

In [5]:
import numpy as np

np.random.seed(12)

np.random.shuffle(volition_templates1)
np.random.shuffle(change_of_state_templates1)
np.random.shuffle(volition_templates2)
np.random.shuffle(change_of_state_templates2)

# subset to balance yes and no 
yes_volition_templates1 = [x for x in volition_templates1 if x.value > 0]
no_volition_templates1 = [x for x in volition_templates1 if x.value < 0]
yes_volition_templates2 = [x for x in volition_templates2 if x.value > 0]
no_volition_templates2 = [x for x in volition_templates2 if x.value < 0]

yes_cos_templates1 = [x for x in change_of_state_templates1 if x.value > 0]
no_cos_templates1 = [x for x in change_of_state_templates1 if x.value < 0]
yes_cos_templates2 = [x for x in change_of_state_templates2 if x.value > 0]
no_cos_templates2 = [x for x in change_of_state_templates2 if x.value < 0]

min_volition = min(len(yes_volition_templates1), len(no_volition_templates1))
min_cos = min(len(yes_cos_templates1), len(no_cos_templates1))

samp_yes_volition_templates1 = np.random.choice(yes_volition_templates1, size=min_volition, replace=False).tolist()
samp_no_volition_templates1 = np.random.choice(no_volition_templates1, size=min_volition, replace=False).tolist()
samp_yes_cos_templates1 = np.random.choice(yes_cos_templates1, size=min_cos, replace=False).tolist()
samp_no_cos_templates1 = np.random.choice(no_cos_templates1, size=min_cos, replace=False).tolist() 

samp_yes_volition_templates2 = np.random.choice(yes_volition_templates2, size=min_volition, replace=False).tolist()
samp_no_volition_templates2 = np.random.choice(no_volition_templates2, size=min_volition, replace=False).tolist()
samp_yes_cos_templates2 = np.random.choice(yes_cos_templates2, size=min_cos, replace=False).tolist()
samp_no_cos_templates2 = np.random.choice(no_cos_templates2, size=min_cos, replace=False).tolist() 


def interleave(l1,l2):
    to_ret = []
    for a,b in zip(l1, l2):
        to_ret.append(a)
        to_ret.append(b)
    return to_ret 

balanced_volition_templates1 = interleave(samp_yes_volition_templates1, samp_no_volition_templates1)
balanced_cos_templates1 = interleave(samp_yes_cos_templates1, samp_no_cos_templates1)
balanced_volition_templates2 = interleave(samp_yes_volition_templates2, samp_no_volition_templates2)
balanced_cos_templates2 = interleave(samp_yes_cos_templates2, samp_no_cos_templates2)

In [6]:
import pdb 
import copy 
import numpy as np 
np.random.seed(31)

instr_str = "Answer this yes-no question about the following sentence.\n"

def make_prompts(templates, num_prompts, max_prefix_size = 3, instructions = instr_str):
    np.random.shuffle(templates)
    prompts = []
    done = []
    max_num_prompts = num_prompts
    chosen = [[] for i in range(max_prefix_size)]
    prompts = [[] for i in range(max_prefix_size)]
    template_idxs = [j for j in range(len(templates))]
    for i in range(max_prefix_size):
        # template_idxs = [j for j in range(len(templates)) if j not in done]
        # choose 100 prompts 
        # chosen_idxs = np.random.choice(template_idxs, size=max_num_prompts, replace=False).tolist()
        chosen_idxs = template_idxs[i * max_num_prompts: (i+1) * max_num_prompts ]
        done += chosen_idxs
        to_add = [templates[idx] for idx in chosen_idxs]
        # np.random.shuffle(to_add)
        chosen[i] = to_add
    
    for i in range(max_num_prompts):
        bottom_prompt = chosen[0][i]
        correct_value = "Yes" if bottom_prompt.value > 0 else "No"
        bottom_prompt_str = str(bottom_prompt)
        bottom_prompt_str = re.sub("(?<=(Answer: ))((Yes)|(No))", "", bottom_prompt_str) 

        prompts[0].append({"prompt": f"{instructions}{bottom_prompt_str}", "correct_value": correct_value})

        next_prompt = chosen[1][i]
        prompts[1].append({"prompt": f"{instructions}{str(next_prompt)}\n{bottom_prompt_str}", "correct_value": correct_value})

        next_prompt2 = chosen[2][i]
        prompts[2].append({"prompt": f"{instructions}{str(next_prompt2)}\n{(next_prompt)}\n{bottom_prompt_str}", "correct_value": correct_value})

        next_prompt3 = chosen[3][i]
        prompts[3].append({"prompt": f"{instructions}{str(next_prompt3)}\n{str(next_prompt2)}\n{(next_prompt)}\n{bottom_prompt_str}", "correct_value": correct_value})
        

    return prompts 

In [7]:
import json 

# max num prompts 
volition_max_prompts = int(len(balanced_volition_templates1)/4) 


volition_prompts1 = make_prompts(balanced_volition_templates1, volition_max_prompts, max_prefix_size=4)
volition_prompts2 = make_prompts(balanced_volition_templates2, volition_max_prompts, max_prefix_size=4) 

cos_max_prompts = int(len(balanced_cos_templates1)/4) 

cos_prompts1 = make_prompts(balanced_cos_templates1, cos_max_prompts, max_prefix_size=4) 
cos_prompts2 = make_prompts(balanced_cos_templates2, cos_max_prompts, max_prefix_size=4) 

for k in range(0, 4):
    with open(f"../data/agent_patient/volition_1_prefix_{k}.json", "w") as f1:
        json.dump(volition_prompts1[k], f1)
    with open(f"../data/agent_patient/volition_2_prefix_{k}.json", "w") as f1:
        json.dump(volition_prompts2[k], f1)
    with open(f"../data/agent_patient/change_of_state_1_prefix_{k}.json", "w") as f1:
        json.dump(cos_prompts1[k], f1)
    with open(f"../data/agent_patient/change_of_state_2_prefix_{k}.json", "w") as f1:
        json.dump(cos_prompts2[k], f1)

118
