## Recasting

- This time, with and without prompting 
- Instructions: 
    - Please answer the following yes-no question about this sentence: <Sentence>
- Volition: 
    - ``In the event "<event>", did the <arg> act on purpose?``
    - Need to edit events and args to make grammatical 
- Change of state
    - ``In the event "<event>", did the state of <arg> change?``






In [None]:
from decomp import UDSCorpus 


c = UDSCorpus(split='dev')

In [None]:
import sys 
from tqdm import tqdm 
volition_examples = []
change_of_state_examples = []


def extract_prompt_info(graph, edge): 
    pred_node, arg_node = edge 
    try:
        pred = graph.head(pred_node)[1][0]
        arg = graph.head(arg_node)[1][0]
    except IndexError:
        pred, arg = None, None
    return pred, arg

for gname in tqdm(c): 
    g = c[gname]
    sent = g.sentence
    try:
        subg = g.semantics_subgraph
    except AttributeError:
        continue
    if len(subg.nodes) == 0:
        continue
    for edge in subg.edges: 
        n1, n2 = edge 

        edge_props = subg.edges[edge]
        if "protoroles" in edge_props: 
            if "volition" in edge_props['protoroles'] and abs(edge_props['protoroles']['volition']['value']) > 1: 
                pred, arg = extract_prompt_info(g, edge)
                volition_example = (gname, edge, pred, arg, sent, edge_props['protoroles']['volition'])
                volition_examples.append(volition_example)
                
            if "change_of_state" in edge_props['protoroles'] and abs(edge_props['protoroles']['change_of_state']['value']) > 1: 
                pred, arg = extract_prompt_info(g, edge)
                change_of_state_example = (gname, edge, pred, arg, sent, edge_props['protoroles']['change_of_state'])
                change_of_state_examples.append(change_of_state_example)


In [None]:

class VolitionTemplate:
    def __init__(self, sent, pred, arg, value=None): 
        self.sent = sent 
        self.pred = pred    
        self.arg = arg
        self.value = value 

    def __str__(self):
        to_ret = f"Sentence: \"{self.sent}\"\n" + \
                 f"In the event \"{self.pred}\", does the participant \"{self.arg}\" act with volition?" 
        if self.value is not None:
            if self.value > 0: 
                ans = "Yes" 
            else:
                ans = "No"
            to_ret += f"\n{ans}" 
        return to_ret 

class ChangeOfStateTemplate:
    def __init__(self, sent, pred, arg, value=None): 
        self.sent = sent 
        self.pred = pred    
        self.arg = arg
        self.value = value

    def __str__(self):
        to_ret = f"Sentence: \"{self.sent}\"\n" + \
                   f"In the event \"{self.pred}\", does the state of the participant \"{self.arg}\" change?" 
        if self.value is not None: 
            if self.value > 0: 
                ans = "Yes" 
            else:
                ans = "No"
            to_ret += f"\n{ans}" 
        return to_ret 

In [None]:
import re

# limit sentences to 35 tokens to avoid overly complicated ones 
max_len = 35

volition_templates = []
change_of_state_templates = []
for example in volition_examples:
    gname, edge, pred, arg, sent, val_dict = example
    if len(re.split("\s+", sent)) > max_len:
        continue
    t = VolitionTemplate(sent, pred, arg, val_dict['value'])
    volition_templates.append(t)

for example in change_of_state_examples:
    gname, edge, pred, arg, sent, val_dict = example
    if len(re.split("\s+", sent)) > max_len:
        continue
    t = ChangeOfStateTemplate(sent, pred, arg, val_dict['value'])
    change_of_state_templates.append(t)

In [None]:
import numpy as np

np.random.seed(12)

np.random.shuffle(volition_templates)
np.random.shuffle(change_of_state_templates)

# subset to balance yes and no 
yes_volition_templates = [x for x in volition_templates if x.value > 0]
no_volition_templates = [x for x in volition_templates if x.value < 0]

yes_cos_templates = [x for x in change_of_state_templates if x.value > 0]
no_cos_templates = [x for x in change_of_state_templates if x.value < 0]

min_volition = min(len(yes_volition_templates), len(no_volition_templates))
min_cos = min(len(yes_cos_templates), len(no_cos_templates))

samp_yes_volition_templates = np.random.choice(yes_volition_templates, size=min_volition, replace=False).tolist()
samp_no_volition_templates = np.random.choice(no_volition_templates, size=min_volition, replace=False).tolist()
samp_yes_cos_templates = np.random.choice(yes_cos_templates, size=min_cos, replace=False).tolist()
samp_no_cos_templates = np.random.choice(no_cos_templates, size=min_cos, replace=False).tolist() 


balanced_volition_templates = samp_yes_volition_templates + samp_no_volition_templates
balanced_cos_templates = samp_yes_cos_templates + samp_no_cos_templates

In [None]:
import pdb 
import copy 

instr_str = "Answer this yes-no question about the following sentence.\n"
def make_prompts(templates, num_prompts, prefix_size = 3, instructions = instr_str):
    np.random.shuffle(templates)
    prompts = []
    max_num_prompts = min(num_prompts, int(len(templates)/(prefix_size + 1)))
    for i in range(max_num_prompts): 
        template_idxs = [i for i in range(len(templates))]
        if len(templates) < prefix_size + 1:
            break
        chosen_idxs = np.random.choice(template_idxs, size=prefix_size + 1, replace=False).tolist()
        chosen = [copy.deepcopy(templates[i]) for i in chosen_idxs]
        templates = [x for i, x in enumerate(templates) if i not in chosen_idxs]
        # set last val to None so that it doesn't show 
        correct_value = "Yes" if chosen[-1].value > 0 else "No"
       
        chosen[-1].value = None
        prompt = instructions + "\n".join([str(t) for t in chosen])
        to_write = {"prompt": prompt, "correct_value": correct_value}
        prompts.append(to_write)
    return prompts 

In [None]:
import json 

for s in range(0, 4): 
    volition_prompts = make_prompts(balanced_volition_templates, 40, prefix_size=s) 
    cos_prompts = make_prompts(balanced_cos_templates, 40, prefix_size=s) 

    with open(f"../data/agent_patient/volition_prefix_{s}.json", "w") as f1:
        json.dump(volition_prompts, f1)
    with open(f"../data/agent_patient/change_of_state_prefix_{s}.json", "w") as f1:
        json.dump(cos_prompts, f1)

# Large models on SPRL
## GPT-3 

In [5]:
from agent_patient_experiment import AgentPatientExperiment
from api_tools import run_gpt_prompt
import time 
s = 0
gpt_kwargs = {"max_tokens": 20, "temperature": 0.0}
# gpt_object_control_experiment  = AgentPatientExperiment("gpt3", "object-control", FixedGPTPrompt, run_gpt_prompt, 1, gpt_kwargs)
for s in range(0, 4):
    gpt_object_control_experiment  = AgentPatientExperiment("gpt-3", "volition", f"../data/agent_patient/volition_1_prefix_{s}.json", run_gpt_prompt, 1, gpt_kwargs)
    t0 = time.time()
    gpt_object_control_experiment.run(overwrite=False)
    gpt_df = gpt_object_control_experiment.format_results()
    t1 = time.time()
    gpt_df.to_csv(f"../agent_patient_results_1/gpt_volition_prefix_{s}.csv")
    time.sleep(max(0, 60 - (t1 - t0)) + 10)

for s in range(0, 4):
    gpt_object_control_experiment  = AgentPatientExperiment("gpt-3", "change_of_state", f"../data/agent_patient/change_of_state_1_prefix_{s}.json", run_gpt_prompt, 1, gpt_kwargs)
    t0 = time.time()
    gpt_object_control_experiment.run(overwrite=False)

    gpt_df = gpt_object_control_experiment.format_results()
    t1 = time.time()
    gpt_df.to_csv(f"../agent_patient_results_1/gpt_change_of_state_prefix_{s}.csv")
    time.sleep(60)
    time.sleep(max(0, 60 - (t1 - t0)) + 10)


100%|██████████| 100/100 [02:02<00:00,  1.23s/it]
100%|██████████| 100/100 [01:59<00:00,  1.20s/it]
100%|██████████| 100/100 [02:01<00:00,  1.22s/it]
100%|██████████| 100/100 [02:01<00:00,  1.22s/it]
100%|██████████| 100/100 [02:04<00:00,  1.25s/it]
100%|██████████| 100/100 [02:05<00:00,  1.25s/it]
100%|██████████| 100/100 [02:07<00:00,  1.28s/it]
100%|██████████| 100/100 [02:06<00:00,  1.27s/it]


In [None]:
from agent_patient_experiment import AgentPatientExperiment
from api_tools import run_gpt_prompt
import time 
s = 0
gpt_kwargs = {"max_tokens": 20, "temperature": 0.0}
# gpt_object_control_experiment  = AgentPatientExperiment("gpt3", "object-control", FixedGPTPrompt, run_gpt_prompt, 1, gpt_kwargs)
for s in range(0, 4):
    gpt_object_control_experiment  = AgentPatientExperiment("gpt-3", "volition", f"../data/agent_patient/volition_2_prefix_{s}.json", run_gpt_prompt, 1, gpt_kwargs)
    t0 = time.time()
    gpt_object_control_experiment.run(overwrite=False)
    gpt_df = gpt_object_control_experiment.format_results()
    t1 = time.time()
    elapsed = t1-t0
    gpt_df.to_csv(f"../agent_patient_results_2/gpt_volition_prefix_{s}.csv")
    time.sleep(max(0, 60 - elapsed + 10))

for s in range(0, 4):
    gpt_object_control_experiment  = AgentPatientExperiment("gpt-3", "volition", f"../data/agent_patient/change_of_state_2_prefix_{s}.json", run_gpt_prompt, 1, gpt_kwargs)
    t0 = time.time
    gpt_object_control_experiment.run(overwrite=False)

    gpt_df = gpt_object_control_experiment.format_results()
    t1 = time.time()
    elapsed = t1 - t0
    gpt_df.to_csv(f"../agent_patient_results_2/gpt_change_of_state_prefix_{s}.csv")
    time.sleep(max(0, 60 - elapsed + 10))


100%|██████████| 100/100 [02:15<00:00,  1.35s/it]
100%|██████████| 100/100 [02:10<00:00,  1.31s/it]
100%|██████████| 100/100 [02:19<00:00,  1.39s/it]
100%|██████████| 100/100 [02:25<00:00,  1.45s/it]
100%|██████████| 100/100 [02:15<00:00,  1.35s/it]
100%|██████████| 100/100 [02:12<00:00,  1.33s/it]
100%|██████████| 100/100 [02:04<00:00,  1.24s/it]
100%|██████████| 100/100 [02:05<00:00,  1.25s/it]


## Jurassic large

In [1]:
from agent_patient_experiment import AgentPatientExperiment
from api_tools import run_ai21_prompt
import time
jurassic_kwargs = {"max_tokens": 20, "temperature": 0.0}

# for s in range(0, 4):
#     jurassic_object_control_experiment  = AgentPatientExperiment("jurassic-large", "volition", f"../data/agent_patient/volition_1_prefix_{s}.json", run_ai21_prompt, 1, jurassic_kwargs)
#     t0 = time.time()
#     jurassic_object_control_experiment.run(overwrite=False)
#     jurassic_df = jurassic_object_control_experiment.format_results()
#     t1 = time.time()
#     elapsed = t1 - t0
#     jurassic_df.to_csv(f"../agent_patient_results_1/jurassic_volition_prefix_{s}.csv")
#     time.sleep(max(0, 60 - elapsed + 10))
 

for s in range(2, 4):
    jurassic_object_control_experiment  = AgentPatientExperiment("jurassic-large", "change_of_state", f"../data/agent_patient/change_of_state_1_prefix_{s}.json", run_ai21_prompt, 1, jurassic_kwargs)
    t0 = time.time()
    jurassic_object_control_experiment.run(overwrite=False)
    jurassic_df = jurassic_object_control_experiment.format_results()
    t1 = time.time()
    elapsed = t1 - t0
    jurassic_df.to_csv(f"../agent_patient_results_1/jurassic_change_of_state_prefix_{s}.csv")
    time.sleep(60)


100%|██████████| 118/118 [02:22<00:00,  1.21s/it]
100%|██████████| 118/118 [02:22<00:00,  1.21s/it]


In [1]:
from agent_patient_experiment import AgentPatientExperiment
from api_tools import run_ai21_prompt
import time
jurassic_kwargs = {"max_tokens": 20, "temperature": 0.0}

# for s in range(0, 4):
#     jurassic_object_control_experiment  = AgentPatientExperiment("jurassic-large", "volition", f"../data/agent_patient/volition_2_prefix_{s}.json", run_ai21_prompt, 1, jurassic_kwargs)
#     t0 = time.time()
#     jurassic_object_control_experiment.run(overwrite=False)

#     jurassic_df = jurassic_object_control_experiment.format_results()
#     t1 = time.time()
#     elapsed = t1 - t0
#     jurassic_df.to_csv(f"../agent_patient_results_2/jurassic_volition_prefix_{s}.csv")
#     time.sleep(60)
 
time.sleep(60)
for s in range(2, 4):
    jurassic_object_control_experiment  = AgentPatientExperiment("jurassic-large", "volition", f"../data/agent_patient/change_of_state_2_prefix_{s}.json", run_ai21_prompt, 1, jurassic_kwargs)
    t0 = time.time()
    jurassic_object_control_experiment.run(overwrite=False)

    jurassic_df = jurassic_object_control_experiment.format_results()
    t1 = time.time()
    elapsed = t1 - t0
    jurassic_df.to_csv(f"../agent_patient_results_2/jurassic_change_of_state_prefix_{s}.csv")
    time.sleep(60)


 47%|████▋     | 55/118 [00:22<00:22,  2.84it/s]

## Jurassic jumbo

In [None]:
from agent_patient_experiment import AgentPatientExperiment
from api_tools import run_ai21_jumbo_prompt
import time 
jurassic_kwargs = {"max_tokens": 20, "temperature": 0.0}

for s in range(0, 4):
    jurassic_object_control_experiment  = AgentPatientExperiment("jurassic-jumbo", "volition", f"../data/agent_patient/volition_1_prefix_{s}.json", run_ai21_jumbo_prompt, 1, jurassic_kwargs)

    t0 = time.time()
    jurassic_object_control_experiment.run(overwrite=False, rate_limit_delay=60, rate_limit_count=19)
    jurassic_df = jurassic_object_control_experiment.format_results()
    t1 = time.time()
    elapsed = t1-t0
    jurassic_df.to_csv(f"../agent_patient_results_1/jurassic_jumbo_volition_prefix_{s}.csv")
    time.sleep(max(0, 60 - elapsed + 10))

for s in range(0, 4):
    jurassic_object_control_experiment  = AgentPatientExperiment("jurassic-jumbo", "volition", f"../data/agent_patient/change_of_state_1_prefix_{s}.json", run_ai21_jumbo_prompt, 1, jurassic_kwargs)
    t0 = time.time()
    jurassic_object_control_experiment.run(overwrite=False, rate_limit_delay=60, rate_limit_count=19)

    jurassic_df = jurassic_object_control_experiment.format_results()
    t1 = time.time()
    elapsed = t1-t0
    jurassic_df.to_csv(f"../agent_patient_results_1/jurassic_jumbo_change_of_state_prefix_{s}.csv")
    time.sleep(max(0, 60 - elapsed + 10))


100%|██████████| 100/100 [05:50<00:00,  3.50s/it]
100%|██████████| 100/100 [05:50<00:00,  3.50s/it]
100%|██████████| 100/100 [05:50<00:00,  3.50s/it]
100%|██████████| 100/100 [05:50<00:00,  3.50s/it]
100%|██████████| 100/100 [05:50<00:00,  3.50s/it]
100%|██████████| 100/100 [05:50<00:00,  3.50s/it]
100%|██████████| 100/100 [05:50<00:00,  3.50s/it]
100%|██████████| 100/100 [05:50<00:00,  3.50s/it]


In [None]:
from agent_patient_experiment import AgentPatientExperiment
from api_tools import run_ai21_jumbo_prompt
import time 
jurassic_kwargs = {"max_tokens": 20, "temperature": 0.0}

for s in range(0, 4):
    jurassic_object_control_experiment  = AgentPatientExperiment("jurassic-jumbo", "volition", f"../data/agent_patient/volition_2_prefix_{s}.json", run_ai21_jumbo_prompt, 1, jurassic_kwargs)
    t0 = time.time
    jurassic_object_control_experiment.run(overwrite=False, rate_limit_delay=60, rate_limit_count=19)

    jurassic_df = jurassic_object_control_experiment.format_results()
    t1 = time.time()
    elapsed = t1-t0
    jurassic_df.to_csv(f"../agent_patient_results_2/jurassic_jumbo_volition_prefix_{s}.csv")
    time.sleep(max(0, 60 - elapsed + 10))

for s in range(0, 4):
    jurassic_object_control_experiment  = AgentPatientExperiment("jurassic-jumbo", "volition", f"../data/agent_patient/change_of_state_2_prefix_{s}.json", run_ai21_jumbo_prompt, 1, jurassic_kwargs)
    t0 = time.time()
    jurassic_object_control_experiment.run(overwrite=False, rate_limit_delay=60, rate_limit_count=19)

    jurassic_df = jurassic_object_control_experiment.format_results()
    t1 = time.time()
    elapsed = t1 - t0
    jurassic_df.to_csv(f"../agent_patient_results_2/jurassic_jumbo_change_of_state_prefix_{s}.csv")
    time.sleep(max(0, 60 - elapsed + 10))


100%|██████████| 100/100 [05:50<00:00,  3.50s/it]
100%|██████████| 100/100 [05:50<00:00,  3.50s/it]
100%|██████████| 100/100 [05:50<00:00,  3.50s/it]
100%|██████████| 100/100 [05:50<00:00,  3.50s/it]
100%|██████████| 100/100 [05:50<00:00,  3.50s/it]
100%|██████████| 100/100 [05:50<00:00,  3.50s/it]
100%|██████████| 100/100 [05:50<00:00,  3.50s/it]
100%|██████████| 100/100 [05:50<00:00,  3.50s/it]
