In [2]:
import json
import pandas as pd 

from experiment import Experiment
from api_tools import (FixedGPTPrompt, 
                       FixedPassiveGPTPrompt, 
                       FixedT5Prompt, 
                       FixedPassiveT5Prompt, 
                       run_ai21_prompt, 
                       run_ai21_jumbo_prompt, 
                       run_gpt_prompt, 
                       run_t5_prompt)

from metrics import accuracy_report


# Object Control
Starting with the object control experiments, we will look at how 4 LMs do:
1. GPT3
2. T5 for QA 
3. Jurassic large 
4. Jurassic Jumbo

## Experimental settings
We're looking at different name pairs to control for gender. We have 2 male-female name pairs, 1 male-male pair, 1 female-female pair, and 1 neutral-neutral. 
We're also looking at 10 object control verbs and 5 different actions. 

In [3]:
names = json.load(open("../data/names_top_2.json"))
verbs = ["told", "ordered", "called upon", "reminded", "urged", "asked", "persuaded", "convinced", "forced", "pushed"]
# actions = [("to leave", "left"), ("to call home", "called home"), ("to reply", "replied"), ("to wipe the counter", "wiped the counter"), ("to dance", "danced")]
actions = json.load(open("../data/verbs.json"))
correct_index = 1
nicknames = json.load(open("../data/nicknames.json"))

## GPT 3
 

In [None]:



# gpt_kwargs = {"max_tokens": 2, "temperature": 0.0}
# gpt_object_control_experiment  = Experiment("gpt3", "object-control", FixedGPTPrompt, run_gpt_prompt, 1, gpt_kwargs)
# gpt_object_control_experiment.recover("/Users/Elias/child-lm/results/gpt_object_control_swap_names.csv")


# print(gpt_object_control_experiment.results[0:10])
# print(gpt_object_control_experiment.results[-10:])
# gpt_object_control_experiment.run(names, correct_index, verbs, actions, rate_limit=False, nicknames=nicknames)
# gpt_object_control_experiment.recompute(nicknames)
# gpt_df = gpt_object_control_experiment.format_results()
# gpt_df.to_csv("/Users/Elias/child-lm/results/gpt_object_control_swap_names.csv")


In [None]:
accuracy_report(gpt_df)

## GPT Neo 2.7

In [None]:
from hf_tools.hf import HuggingfaceRunFxn
import os
os.environ['TRANSFORMERS_CACHE'] = "/brtx/601-nvme1/estengel/.cache"

wrapper_fxn = HuggingfaceRunFxn("EleutherAI/gpt-neo-2.7B", device="cuda:1", constrained=False)

gptneo_object_control_experiment  = Experiment("gpt-neo-2.7b", "object-control", FixedGPTPrompt, wrapper_fxn, 1, None)

gptneo_object_control_experiment.run(names, correct_index, verbs, actions, do_swap = False, nicknames=nicknames, rate_limit_delay=None, overwrite=True)

gptneo_df = gptneo_object_control_experiment.format_results()

gptneo_df.to_csv("../results/gpt_neo_2.7b_object_control.csv")

accuracy_report(gptneo_df)


## T5 for QA

In [None]:
from hf_tools.hf import HuggingfaceRunFxn

wrapper_fxn = HuggingfaceRunFxn("valhalla/t5-base-qa-qg-hl", "cuda:0")

t5_object_control_experiment  = Experiment("t5", "object-control", FixedT5Prompt, wrapper_fxn, 1, None)

t5_object_control_experiment.run(names, correct_index, verbs, actions, do_swap = False, nicknames=nicknames, rate_limit_delay=None, overwrite=True)

In [None]:
t5_df = t5_object_control_experiment.format_results()

t5_df.to_csv("../results/t5_qa_object_control.csv")

# t5_df = pd.read_csv("/Users/Elias/child-lm/results/t5_object_control.csv")

In [None]:
accuracy_report(t5_df)

## T0 

In [None]:
from hf_tools.hf import HuggingfaceRunFxn
import os
os.environ['TRANSFORMERS_CACHE'] = "/brtx/601-nvme1/estengel/.cache"

wrapper_fxn = HuggingfaceRunFxn("bigscience/T0pp", device="cpu", constrained=False)

t0_object_control_experiment  = Experiment("t0", "object-control", FixedGPTPrompt, wrapper_fxn, 1, None)

t0_object_control_experiment.run(names, correct_index, verbs, actions, do_swap = False, nicknames=nicknames, rate_limit_delay=None, overwrite=True)

t0_df = t0_object_control_experiment.format_results()

t0_df.to_csv("../results/t0_object_control.csv")

accuracy_report(t0_df)


## Jurassic Large

In [None]:

# jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
# jurassic_object_control_experiment  = Experiment("jurassic-large", "object-control", FixedGPTPrompt, run_ai21_prompt, 1, jurassic_kwargs)

# jurassic_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames)

# jurassic_df = jurassic_object_control_experiment.format_results()

# jurassic_df.to_csv("/Users/Elias/child-lm/results/jurassic_object_control_swap_names.csv")

jurassic_df = pd.read_csv("/Users/Elias/child-lm/results/jurassic_object_control_swap_names.csv")

In [None]:
accuracy_report(jurassic_df)

In [None]:

jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
jurassic_object_control_experiment  = Experiment("jurassic-large", "object-control", FixedGPTPrompt, run_ai21_prompt, 1, jurassic_kwargs)
jurassic_object_control_experiment.run(names, correct_index, verbs, actions, qa_pair=("Q","A"))
jurassic_df = jurassic_object_control_experiment.format_results()
jurassic_df.to_csv("/Users/Elias/child-lm/results/jurassic_object_control_qa_swap_names.csv")

# jurassic_df = pd.read_csv("/Users/Elias/child-lm/results/jurassic_object_control_qa_swap_names.csv")

In [None]:
accuracy_report(jurassic_df)

## Jurassic Jumbo

In [3]:

jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
jurassic_jumbo_object_control_experiment  = Experiment("jurassic-jumbo", "object-control", FixedGPTPrompt, run_ai21_jumbo_prompt, 1, jurassic_kwargs)
jurassic_jumbo_object_control_experiment.run(names, correct_index, verbs, actions, rate_limit_delay=60, rate_limit_count=19)
jurassic_jumbo_df = jurassic_jumbo_object_control_experiment.format_results()
jurassic_jumbo_df.to_csv("/Users/Elias/child-lm/results/jurassic_jumbo_object_control_qa_swap_names.csv")

# jurassic_df = pd.read_csv("/Users/Elias/child-lm/results/jurassic_object_control_qa_swap_names.csv")

100%|██████████| 30/30 [2:55:00<00:00, 350.02s/it]  


In [4]:
accuracy_report(jurassic_jumbo_df)

{'total': (0.473, 3000, 0.473, 3000),
 'acc_by_name': {'Nicole,Stephanie': (0.5, 200, 0.5, 200),
  'Nicole,Joseph': (0.51, 200, 0.51, 200),
  'Nicole,William': (0.5, 200, 0.5, 200),
  'Nicole,Casey': (0.415, 200, 0.415, 200),
  'Nicole,Avery': (0.5, 200, 0.5, 200),
  'Stephanie,Joseph': (0.51, 200, 0.51, 200),
  'Stephanie,William': (0.485, 200, 0.485, 200),
  'Stephanie,Casey': (0.445, 200, 0.445, 200),
  'Stephanie,Avery': (0.465, 200, 0.465, 200),
  'Joseph,William': (0.42, 200, 0.42, 200),
  'Joseph,Casey': (0.505, 200, 0.505, 200),
  'Joseph,Avery': (0.49, 200, 0.49, 200),
  'William,Casey': (0.44, 200, 0.44, 200),
  'William,Avery': (0.47, 200, 0.47, 200),
  'Casey,Avery': (0.44, 200, 0.44, 200)},
 'acc_by_action': {'to run': (0.47833333333333333,
   600,
   0.47833333333333333,
   600),
  'to call': (0.455, 600, 0.455, 600),
  'to come': (0.5133333333333333, 600, 0.5133333333333333, 600),
  'to read': (0.45, 600, 0.45, 600),
  'to go': (0.4683333333333333, 600, 0.468333333333333

# Passives 

The passive form reverses the order and also syntactic role of the agent/patient. E.g. 

- Mary told Tom to wipe the counter 
- Tom was told by Mary to wipe the counter 

Now Tom is linearly further away from "wipe" and also the grammatical subject of the matrix clause (though still the patient) 

In [None]:
correct_index = 0 

## GPT3 Passive

In [None]:

gpt_kwargs = {"max_tokens": 2, "temperature": 0.0}
passive_gpt_object_control_experiment  = Experiment("gpt3", "object-control-passive", FixedPassiveGPTPrompt, run_gpt_prompt, 1, gpt_kwargs) 

passive_gpt_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames)

passive_gpt_df = passive_gpt_object_control_experiment.format_results()

passive_gpt_df.to_csv("/Users/Elias/child-lm/results/gpt_passive_object_control_swap_names.csv")
# passive_gpt_df = pd.read_csv("/Users/Elias/child-lm/results/gpt_passive_object_control_swap_names.csv")



In [None]:
accuracy_report(passive_gpt_df)


## T5 for QA Passive

In [None]:

# passive_t5_object_control_experiment  = Experiment("t5", "object-control-passive", FixedPassiveT5Prompt, run_t5_prompt, 1, None)

# passive_t5_object_control_experiment.run(names, correct_index, verbs, actions, do_swap = False)

# passive_t5_df = passive_t5_object_control_experiment.format_results()

# passive_t5_df.to_csv("/Users/Elias/child-lm/results/t5_passive_object_control.csv")
# passive_t5_df = pd.read_csv("/Users/Elias/child-lm/results/t5_passive_object_control.csv")

from hf_tools.hf import HuggingfaceRunFxn

wrapper_fxn = HuggingfaceRunFxn("valhalla/t5-base-qa-qg-hl", device="cuda:1", constrained=False)

passive_t5_object_control_experiment  = Experiment("t5", "object-control-passive", FixedPassiveT5Prompt, wrapper_fxn, 1, None)

passive_t5_object_control_experiment.run(names, correct_index, verbs, actions, do_swap = False, nicknames=nicknames, rate_limit_delay=None, overwrite=True)

passive_t5_df = passive_t5_object_control_experiment.format_results()

passive_t5_df.to_csv("../results/t5_passive_object_control.csv")

In [None]:
accuracy_report(passive_t5_df)

## T0 Passive

In [None]:

from hf_tools.hf import HuggingfaceRunFxn
import os
os.environ['TRANSFORMERS_CACHE'] = "/brtx/601-nvme1/estengel/.cache"

wrapper_fxn = HuggingfaceRunFxn("bigscience/T0pp", device="cpu", constrained=False)

passive_t0_object_control_experiment  = Experiment("t0", "object-control-passive", FixedPassiveGPTPrompt, wrapper_fxn, 1, None)

passive_t0_object_control_experiment.run(names, correct_index, verbs, actions, do_swap = False, nicknames=nicknames, rate_limit_delay=None, overwrite=True)

passive_t0_df = passive_t0_object_control_experiment.format_results()

passive_t0_df.to_csv("../results/t0_passive_object_control.csv")

accuracy_report(passive_t0_df)



## GPT-Neo Passive

In [8]:
from hf_tools.hf import HuggingfaceRunFxn
import os
os.environ['TRANSFORMERS_CACHE'] = "/brtx/601-nvme1/estengel/.cache"

wrapper_fxn = HuggingfaceRunFxn("EleutherAI/gpt-neo-2.7B", device="cuda:1", constrained=False)

passive_gptneo_object_control_experiment  = Experiment("gpt-neo-2.7b", "object-control-passive", FixedPassiveGPTPrompt, wrapper_fxn, 1, None)

passive_gptneo_object_control_experiment.run(names, correct_index, verbs, actions, do_swap = False, nicknames=nicknames, rate_limit_delay=None, overwrite=True)

passive_gptneo_df = passive_gptneo_object_control_experiment.format_results()

passive_gptneo_df.to_csv("../results/gpt_neo_2.7b_passive_object_control.csv")

accuracy_report(passive_gptneo_df)


  0%|          | 0/30 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to 

{'total': (1.0, 1500, 1.0, 1500),
 'acc_by_name': {'Stephanie,Nicole': (1.0, 100, 1.0, 100),
  'Stephanie,Joseph': (1.0, 100, 1.0, 100),
  'Stephanie,William': (1.0, 100, 1.0, 100),
  'Stephanie,Casey': (1.0, 100, 1.0, 100),
  'Stephanie,Avery': (1.0, 100, 1.0, 100),
  'Nicole,Joseph': (1.0, 100, 1.0, 100),
  'Nicole,William': (1.0, 100, 1.0, 100),
  'Nicole,Casey': (1.0, 100, 1.0, 100),
  'Nicole,Avery': (1.0, 100, 1.0, 100),
  'Joseph,William': (1.0, 100, 1.0, 100),
  'Joseph,Casey': (1.0, 100, 1.0, 100),
  'Joseph,Avery': (1.0, 100, 1.0, 100),
  'William,Casey': (1.0, 100, 1.0, 100),
  'William,Avery': (1.0, 100, 1.0, 100),
  'Casey,Avery': (1.0, 100, 1.0, 100)},
 'acc_by_action': {'to call': (1.0, 300, 1.0, 300),
  'to run': (1.0, 300, 1.0, 300),
  'to come': (1.0, 300, 1.0, 300),
  'to go': (1.0, 300, 1.0, 300),
  'to read': (1.0, 300, 1.0, 300)},
 'acc_by_verb': {'told': (1.0, 150, 1.0, 150),
  'urged': (1.0, 150, 1.0, 150),
  'reminded': (1.0, 150, 1.0, 150),
  'asked': (1.0, 15

## Jurassic Large Passive

In [None]:

jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
passive_jurassic_object_control_experiment  = Experiment("jurassic-large", "object-control-passive", FixedPassiveGPTPrompt, run_ai21_prompt, 1, jurassic_kwargs)

passive_jurassic_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames)
passive_jurassic_df = passive_jurassic_object_control_experiment.format_results()

passive_jurassic_df.to_csv("/Users/Elias/child-lm/results/jurassic_passive_object_control_swap_names.csv")

# passive_jurassic_df = pd.read_csv("/Users/Elias/child-lm/results/jurassic_object_control_swap_names.csv")

In [None]:
accuracy_report(passive_jurassic_df)

## Jurassic Jumbo Passive

In [4]:
correct_index = 0 
jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
passive_jurassic_jumbo_object_control_experiment  = Experiment("jurassic-jumbo", "object-control-passive", FixedPassiveGPTPrompt, run_ai21_jumbo_prompt, 1, jurassic_kwargs)

passive_jurassic_jumbo_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, rate_limit_delay=60, rate_limit_count=19)
passive_jurassic_jumbo_df = passive_jurassic_jumbo_object_control_experiment.format_results()

passive_jurassic_jumbo_df.to_csv("/Users/Elias/child-lm/results/jurassic_jumbo_passive_object_control_swap_names.csv")

# passive_jurassic_df = pd.read_csv("/Users/Elias/child-lm/results/jurassic_object_control_swap_names.csv")

100%|██████████| 30/30 [2:55:00<00:00, 350.02s/it]  
