In [1]:
import json
import pandas as pd 

from experiment import Experiment
from api_tools import (FixedGPTPrompt, 
                       FixedPassiveGPTPrompt, 
                       FixedT5Prompt, 
                       FixedPassiveT5Prompt, 
                       run_ai21_prompt, 
                       run_gpt_prompt, 
                       run_t5_prompt)

from metrics import accuracy_report


# Object Control
Starting with the object control experiments, we will look at how 4 LMs do:
1. GPT3
2. T5 for QA 
3. Jurassic large 
4. Jurassic Jumbo

## Experimental settings
We're looking at different name pairs to control for gender. We have 2 male-female name pairs, 1 male-male pair, 1 female-female pair, and 1 neutral-neutral. 
We're also looking at 10 object control verbs and 5 different actions. 

In [2]:
names = json.load(open("../data/professions.json"))
verbs = ["told", "ordered", "called upon", "reminded", "urged", "asked", "persuaded", "convinced", "forced", "pushed"]
actions = json.load(open("../data/verbs.json"))
correct_index = 0

nicknames = json.load(open("../data/nicknames_professions.json"))

## GPT 3
 

In [None]:
gpt_kwargs = {"max_tokens": 4, "temperature": 0.0}
gpt_object_control_experiment  = Experiment("gpt3", "object-control", FixedGPTPrompt, run_gpt_prompt, 1, gpt_kwargs)
gpt_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames) 
gpt_df = gpt_object_control_experiment.format_results()
gpt_df.to_csv("../results_profession/gpt_object_control_swap_names.csv")

In [None]:
accuracy_report(gpt_df)

## Jurassic Large

In [None]:

jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
jurassic_object_control_experiment  = Experiment("jurassic-large", "object-control", FixedGPTPrompt, run_ai21_prompt, 1, jurassic_kwargs)
jurassic_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames)
jurassic_df = jurassic_object_control_experiment.format_results()
jurassic_df.to_csv("../results_profession/jurassic_object_control_swap_names.csv")

In [None]:
accuracy_report(jurassic_df)

## Jurassic Jumbo

In [None]:
from api_tools import run_ai21_jumbo_prompt

jurassic_kwargs = {"maxTokens": 2, "temperature": 0.0}
jurassic_jumbo_object_control_experiment  = Experiment("jurassic-jumbo", "object-control", FixedGPTPrompt, run_ai21_jumbo_prompt, 1, jurassic_kwargs)
jurassic_jumbo_object_control_experiment.recover("../results_profession/jurassic_jumbo_object_control.csv")

jurassic_jumbo_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=True, rate_limit_delay=60, rate_limit_count=19)
jurassic_jumbo_df = jurassic_jumbo_object_control_experiment.format_results()

jurassic_jumbo_df.to_csv("../results_profession/jurassic_jumbo_object_control.csv")


In [None]:
accuracy_report(jurassic_jumbo_df)

# Passives 

The passive form reverses the order and also syntactic role of the agent/patient. E.g. 

- Mary told Tom to wipe the counter 
- Tom was told by Mary to wipe the counter 

Now Tom is linearly further away from "wipe" and also the grammatical subject of the matrix clause (though still the patient) 

In [3]:
correct_index = 0 

## GPT3 Passive

In [4]:

gpt_kwargs = {"max_tokens": 4, "temperature": 0.0}
passive_gpt_object_control_experiment  = Experiment("gpt3", "object-control-passive", FixedPassiveGPTPrompt, run_gpt_prompt, 1, gpt_kwargs) 
passive_gpt_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames)
passive_gpt_df = passive_gpt_object_control_experiment.format_results()
passive_gpt_df.to_csv("../results_profession/gpt_passive_object_control_swap_names.csv")

100%|██████████| 30/30 [1:02:08<00:00, 124.30s/it]


In [None]:
accuracy_report(passive_gpt_df)

## Jurassic Large Passive

In [None]:
jurassic_kwargs = {"maxTokens": 4, "temperature": 0.0}
passive_jurassic_object_control_experiment  = Experiment("jurassic-large", "object-control-passive", FixedPassiveGPTPrompt, run_ai21_prompt, 1, jurassic_kwargs)
passive_jurassic_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames)
passive_jurassic_df = passive_jurassic_object_control_experiment.format_results()
passive_jurassic_df.to_csv("../results_profession/jurassic_passive_object_control_swap_names.csv")

In [None]:
accuracy_report(passive_jurassic_df)

## Jurassic Jumbo Passive

In [None]:
from api_tools import run_ai21_jumbo_prompt

jurassic_kwargs = {"maxTokens": 4, "temperature": 0.0}
passive_jurassic_jumbo_object_control_experiment  = Experiment("jurassic-jumbo", "object-control", FixedGPTPrompt, run_ai21_jumbo_prompt, 1, jurassic_kwargs)

passive_jurassic_jumbo_object_control_experiment.run(names, correct_index, verbs, actions, nicknames=nicknames, do_swap=True, rate_limit_delay=60, rate_limit_count=19)
passive_jurassic_jumbo_df = passive_jurassic_jumbo_object_control_experiment.format_results()

passive_jurassic_jumbo_df.to_csv("../results_profession/jurassic_jumbo_passive_object_control.csv")
