In [1]:
import pandas as pd 
import json
import sys
import pathlib 

path_to_notebook = pathlib.Path("").absolute()
sys.path.insert(0, str(path_to_notebook.parent))
sys.path.insert(0, str(path_to_notebook.parent.parent))

from experiment import Experiment
from api_tools import (FixedGPTPrompt, 
                       FixedPassiveGPTPrompt, 
                       FixedT5Prompt, 
                       FixedPassiveT5Prompt, 
                       run_ai21_prompt, 
                       run_ai21_jumbo_prompt,
                       run_gpt_prompt, 
                       run_t5_prompt)

from metrics import accuracy_report


# Object Control
Starting with the object control experiments, we will look at how 4 LMs do:
1. GPT3
2. T5 for QA 
3. Jurassic large 
4. Jurassic Jumbo

## Experimental settings
We're looking at different name pairs to control for gender. We have 2 male-female name pairs, 1 male-male pair, 1 female-female pair, and 1 neutral-neutral. 
We're also looking at 10 object control verbs and 5 different actions. 

In [2]:
names = json.load(open("../../data/names_top_2.json"))
verbs = ["told", "ordered", "called upon", "reminded", "urged", "asked", "persuaded", "convinced", "forced", "pushed"]
actions = json.load(open("../../data/verbs.json"))
correct_index = 1
nicknames = json.load(open("../../data/nicknames.json"))

## GPT 3
 

In [5]:

gpt_kwargs = {"max_tokens": 2, "temperature": 0.0}
gpt_object_control_experiment  = Experiment("gpt3", "object-control", FixedGPTPrompt, run_gpt_prompt, 1, gpt_kwargs)
gpt_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False)

gpt_df = gpt_object_control_experiment.format_results()
gpt_df.to_csv("../../short_instructions/results/gpt_object_control.csv")


100%|██████████| 30/30 [30:49<00:00, 61.66s/it]


## Jurassic Large

In [6]:

jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
jurassic_object_control_experiment  = Experiment("jurassic-large", "object-control", FixedGPTPrompt, run_ai21_prompt, 1, jurassic_kwargs)

jurassic_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False)

jurassic_df = jurassic_object_control_experiment.format_results()

jurassic_df.to_csv("../../short_instructions/results/jurassic_object_control.csv")



100%|██████████| 30/30 [30:42<00:00, 61.42s/it]


## Jurassic jumbo

In [4]:

jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
jurassic_jumbo_object_control_experiment  = Experiment("jurassic-jumbo", "object-control", FixedGPTPrompt, run_ai21_jumbo_prompt, 1, jurassic_kwargs)

jurassic_jumbo_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False, 
                                            rate_limit_count=19, rate_limit_delay=60)

jurassic_jumbo_df = jurassic_jumbo_object_control_experiment.format_results()

jurassic_jumbo_df.to_csv("../../short_instructions/results/jurassic_jumbo_object_control.csv")



100%|██████████| 30/30 [1:27:30<00:00, 175.01s/it]


# Passives 

The passive form reverses the order and also syntactic role of the agent/patient. E.g. 

- Mary told Tom to wipe the counter 
- Tom was told by Mary to wipe the counter 

Now Tom is linearly further away from "wipe" and also the grammatical subject of the matrix clause (though still the patient) 

In [None]:
correct_index = 0 

## GPT3 Passive

In [10]:

correct_idx = 0
gpt_kwargs = {"max_tokens": 2, "temperature": 0.0}
passive_gpt_object_control_experiment  = Experiment("gpt3", "object-control-passive", FixedPassiveGPTPrompt, run_gpt_prompt, 1, gpt_kwargs) 

passive_gpt_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False)

passive_gpt_df = passive_gpt_object_control_experiment.format_results()

passive_gpt_df.to_csv("../../short_instructions/results/gpt_passive_object_control.csv")

100%|██████████| 30/30 [30:39<00:00, 61.31s/it]


## Jurassic Large Passive

In [11]:

correct_idx = 0
jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
passive_jurassic_object_control_experiment  = Experiment("jurassic-large", "object-control-passive", FixedPassiveGPTPrompt, run_ai21_prompt, 1, jurassic_kwargs)

passive_jurassic_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False)
passive_jurassic_df = passive_jurassic_object_control_experiment.format_results()

passive_jurassic_df.to_csv("../../short_instructions/results/jurassic_passive_object_control.csv")

100%|██████████| 30/30 [30:47<00:00, 61.59s/it]


## Jurassic jumbo passive

In [14]:

correct_idx = 0
jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
passive_jurassic_object_control_experiment  = Experiment("jurassic-jumbo", "object-control-passive", FixedPassiveGPTPrompt, run_ai21_jumbo_prompt, 1, jurassic_kwargs)

passive_jurassic_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False,
                                                 rate_limit_count=19, rate_limit_delay=60)
passive_jurassic_df = passive_jurassic_object_control_experiment.format_results()

passive_jurassic_df.to_csv("../../short_instructions/results/jurassic_jumbo_passive_object_control.csv")

100%|██████████| 30/30 [1:27:30<00:00, 175.00s/it]


# Prompt hacking

## Just agent/patient

## GPT3

In [7]:

correct_idx = 0
gpt_kwargs = {"max_tokens": 2, "temperature": 0.0}
gpt_object_control_experiment  = Experiment("gpt3", "object-control", FixedGPTPrompt, run_gpt_prompt, 1, gpt_kwargs)
gpt_object_control_experiment.recover("../../short_instructions/results_just_prompt_agent/gpt_object_control.csv")
gpt_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False, just_prompt_agent=True)

gpt_df = gpt_object_control_experiment.format_results()
gpt_df.to_csv("../../short_instructions/results_just_prompt_agent/gpt_object_control.csv")


100%|██████████| 30/30 [03:49<00:00,  7.65s/it] 


In [6]:
gpt_df = gpt_object_control_experiment.format_results()
gpt_df.to_csv("../../short_instructions/results_just_prompt_agent/gpt_object_control.csv")

In [8]:

correct_idx = 1
gpt_kwargs = {"max_tokens": 2, "temperature": 0.0}
gpt_object_control_experiment  = Experiment("gpt3", "object-control", FixedGPTPrompt, run_gpt_prompt, 1, gpt_kwargs)
gpt_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False, just_prompt_patient=True)

gpt_df = gpt_object_control_experiment.format_results()
gpt_df.to_csv("../../short_instructions/results_just_prompt_patient/gpt_object_control.csv")


100%|██████████| 30/30 [30:38<00:00, 61.29s/it]


## Fully hacked

## GPT-3

In [9]:

correct_idx = 1
gpt_kwargs = {"max_tokens": 2, "temperature": 0.0}
gpt_object_control_experiment  = Experiment("gpt3", "object-control", FixedGPTPrompt, run_gpt_prompt, 1, gpt_kwargs)
gpt_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False, prompt_hacking=True)

gpt_df = gpt_object_control_experiment.format_results()
gpt_df.to_csv("../../short_instructions/results_hacked/gpt_object_control.csv")


100%|██████████| 30/30 [30:40<00:00, 61.36s/it]


## GPT-3 passive

In [16]:

correct_idx = 0
gpt_kwargs = {"max_tokens": 2, "temperature": 0.0}
gpt_object_control_experiment  = Experiment("gpt3", "object-control-passive", FixedPassiveGPTPrompt, run_gpt_prompt, 1, gpt_kwargs)
gpt_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False, prompt_hacking=True)

gpt_df = gpt_object_control_experiment.format_results()
gpt_df.to_csv("../../short_instructions/results_hacked/gpt_passive_object_control.csv")


100%|██████████| 30/30 [30:40<00:00, 61.34s/it]


## Jurassic large

In [3]:

correct_index = 1
jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
jurassic_object_control_experiment  = Experiment("jurassic-large", "object-control", FixedGPTPrompt, run_ai21_prompt, 1, jurassic_kwargs)
jurassic_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False, prompt_hacking=True, 
                                             rate_limit_delay=60, rate_limit_count=30)
jurassic_df = jurassic_object_control_experiment.format_results()
jurassic_df.to_csv("../../short_instructions/results_hacked/jurassic_object_control.csv")

100%|██████████| 30/30 [56:05<00:00, 112.19s/it]


## Jurassic large passive

In [12]:

correct_idx = 0
jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
passive_jurassic_object_control_experiment  = Experiment("jurassic-large", "object-control-passive", FixedPassiveGPTPrompt, run_ai21_prompt, 1, jurassic_kwargs)
passive_jurassic_object_control_experiment.recover("../../short_instructions/results_hacked/jurassic_passive_object_control.csv")
passive_jurassic_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False, prompt_hacking=True)
passive_jurassic_df = passive_jurassic_object_control_experiment.format_results()
passive_jurassic_df.to_csv("../../short_instructions/results_hacked/jurassic_passive_object_control.csv")

100%|██████████| 30/30 [25:57<00:00, 51.91s/it]


## Jurassic jumbo

In [3]:

jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
jurassic_jumbo_object_control_exper
iment  = Experiment("jurassic-jumbo", "object-control", FixedGPTPrompt, run_ai21_jumbo_prompt, 1, jurassic_kwargs)
jurassic_jumbo_object_control_experiment.recover("../../short_instructions/results_hacked/jurassic_jumbo_object_control.csv")
jurassic_jumbo_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False, prompt_hacking=True, 
                                            overwrite=False,
                                            rate_limit_count=19, rate_limit_delay=60)



100%|██████████| 30/30 [04:44<00:00,  9.48s/it] 


In [4]:
jurassic_jumbo_df = jurassic_jumbo_object_control_experiment.format_results()

jurassic_jumbo_df.to_csv("../../short_instructions/results_hacked/jurassic_jumbo_object_control.csv")



## Jurassic jumbo passive 

In [5]:
import time
time.sleep(60)

correct_idx = 0
jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
passive_jurassic_object_control_experiment  = Experiment("jurassic-jumbo", "object-control-passive", FixedPassiveGPTPrompt, run_ai21_jumbo_prompt, 1, jurassic_kwargs)

passive_jurassic_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False, 
                                                prompt_hacking=True, rate_limit_count=19, rate_limit_delay=60)
passive_jurassic_df = passive_jurassic_object_control_experiment.format_results()

passive_jurassic_df.to_csv("../../short_instructions/results_hacked/jurassic_jumbo_passive_object_control.csv")

100%|██████████| 30/30 [1:27:30<00:00, 175.01s/it]


# Now with instructions

## GPT-3 object control
- already done

## GPT-3 passive object control
- already done

## GPT-3 prompt-hacked object control 

In [15]:

correct_idx = 1
gpt_kwargs = {"max_tokens": 2, "temperature": 0.0}
gpt_object_control_experiment  = Experiment("gpt3", "object-control", FixedGPTPrompt, run_gpt_prompt, 1, gpt_kwargs)
gpt_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False, prompt_hacking=True, long_instruction=True)

gpt_df = gpt_object_control_experiment.format_results()
gpt_df.to_csv("../../with_instructions/results_hacked/gpt_object_control.csv")


100%|██████████| 30/30 [30:39<00:00, 61.33s/it]


## GPT-3 passive prompt-hacked object control 

In [17]:

correct_idx = 0
gpt_kwargs = {"max_tokens": 2, "temperature": 0.0}
gpt_object_control_experiment  = Experiment("gpt3", "object-control-passive", FixedPassiveGPTPrompt, run_gpt_prompt, 1, gpt_kwargs)
gpt_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False, prompt_hacking=True, long_instruction=True)

gpt_df = gpt_object_control_experiment.format_results()
gpt_df.to_csv("../../with_instructions/results_hacked/gpt_passive_object_control.csv")


100%|██████████| 30/30 [30:38<00:00, 61.29s/it]


## Jurassic large

In [8]:

jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
jurassic_object_control_experiment  = Experiment("jurassic-large", "object-control", FixedGPTPrompt, run_ai21_prompt, 1, jurassic_kwargs)

jurassic_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, 
                                       rate_limit_count=58, rate_limit_delay=60,
                                       do_swap=False, prompt_hacking=True, long_instruction=True)

jurassic_df = jurassic_object_control_experiment.format_results()

jurassic_df.to_csv("../../with_instructions/results_hacked/jurassic_object_control.csv")



100%|██████████| 30/30 [29:22<00:00, 58.76s/it]


## Jurassic large passive

In [3]:

correct_idx = 0
jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
passive_jurassic_object_control_experiment  = Experiment("jurassic-large", "object-control-passive", FixedPassiveGPTPrompt, run_ai21_jumbo_prompt, 1, jurassic_kwargs)

passive_jurassic_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False,
                                                prompt_hacking=True, rate_limit_count=19, rate_limit_delay=60)
passive_jurassic_df = passive_jurassic_object_control_experiment.format_results()

passive_jurassic_df.to_csv("../../with_instructions/results_hacked/jurassic_passive_object_control.csv")

100%|██████████| 30/30 [1:27:30<00:00, 175.01s/it]


## Jurassic jumbo passive 

In [4]:

correct_idx = 0
jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
passive_jurassic_object_control_experiment  = Experiment("jurassic-jumbo", "object-control-passive", FixedPassiveGPTPrompt, run_ai21_jumbo_prompt, 1, jurassic_kwargs)

passive_jurassic_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False, long_instruction=True,
                                                prompt_hacking=True, rate_limit_count=19, rate_limit_delay=60)
passive_jurassic_df = passive_jurassic_object_control_experiment.format_results()

passive_jurassic_df.to_csv("../../with_instructions/results_hacked/jurassic_jumbo_passive_object_control.csv")

100%|██████████| 30/30 [1:27:30<00:00, 175.01s/it]


## Jurassic jumbo active

In [6]:

jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
jurassic_jumbo_object_control_experiment  = Experiment("jurassic-jumbo", "object-control", FixedGPTPrompt, run_ai21_jumbo_prompt, 1, jurassic_kwargs)

jurassic_jumbo_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=False, long_instruction=True,
                                            prompt_hacking=True,
                                            rate_limit_count=19, rate_limit_delay=60)

jurassic_jumbo_df = jurassic_jumbo_object_control_experiment.format_results()

jurassic_jumbo_df.to_csv("../../with_instructions/results_hacked/jurassic_jumbo_object_control.csv")



  0%|          | 0/30 [00:00<?, ?it/s]