In [26]:
import datasets
import os
import openai
import numpy as np
with open(os.path.expanduser('~/.openai_api_key'), 'r') as file:
    openai.api_key = file.read().replace('\n', '')

import adatest

In [93]:
d = datasets.load_dataset('bigbench', 'anachronisms')



  0%|          | 0/3 [00:00<?, ?it/s]

In [95]:
import numpy as np
inputs = d['train']['inputs'] + d['validation']['inputs']
inputs = [x.split('\n')[0] for x in inputs]
labels = np.array([int(x[0] == 'Yes') for x in d['train']['targets'] + d['validation']['targets']])

In [101]:
class OpenAIModel(adatest.Model):
    def __init__(self, model="text-davinci-002", quote="\"", temperature=0.7, top_p=1, max_length=30, n=1):
        self.model = model
        self.api_key = openai.api_key
        self.quote = quote
        self.temperature = temperature
        self.top_p = top_p
        self.max_length = max_length
        self.n = n
    def __call__(self, strings):
        resp = openai.Completion.create(
            model=self.model,
            prompt=strings,
            max_tokens=self.max_length,
            temperature=self.temperature,
            top_p=self.top_p,
            n=self.n,
            stop=self.quote,
        )
        return [x["text"] for x in resp['choices']]

gpt3 = OpenAIModel(model="text-davinci-002",  max_length=200, quote='', n=1)


In [231]:
def propose_decomposition(n=20):
    gpt3 = OpenAIModel(model="text-davinci-002",  max_length=400, quote='---', n=n)
    prompt = '''I want to break down the task of figuring out whether a sentence contains anachronisms or not, into individual steps. An anachronism is a mistake in chronology, or a person, thing, or event that is out of its proper time. Here are examples of input-output pairs for the task I'm trying to break down:
----
Input: George Washington fought in the American Civil War.
Output: No
Input: The Mongolian horse rider used his bow to hunt the velociraptor.
Output: Yes
Input: Beats from the MPC3000 helped inspire many original blues artists.
Output: No
Input: Attila the Hun acted in the live-action remake of Mulan.
Output: Yes
Input: Kurt Cobain starred in the 1990 television show "Twin Peaks".
Output: Yes
----
Steps:
1.'''
    return gpt3(prompt)


In [260]:
decompositions = propose_decomposition(50)

In [261]:
def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

def get_anachronism_fn(decomposition, batch_size=10):
    decomposition = '1.'+ decomposition
    last_n = int(re.findall(r'(\d+)\.', decomposition)[-1])
    decomposition += '\n%s. Output YES if there is an anachronism, and NO otherwise' % (last_n + 1)
    def decomposition_fn(sentences):
        gpt3 = OpenAIModel(model="text-davinci-002",  max_length=400, quote='---', n=1)
        out = []
        for chunk in chunks(sentences, batch_size):
            prompts = ['''Figure out whether a sentence contains anachronisms or not, using the following steps
Steps:
%s
----
Sentence: %s
1.''' % (decomposition, x) for x in chunk]
            out.extend(gpt3(prompts))
        return out
    return decomposition_fn

In [262]:
idxs = np.random.choice(len(inputs), 50, replace=False)
labs = np.array([labels[i] for i in idxs])
subset = [inputs[i] for i in idxs]

In [None]:
preds = []
pps = []
accs = []
for z, decomposition in enumerate(decompositions):
    print('Decomposition', z)
    fn = get_anachronism_fn(decomposition, batch_size=20)
    this_preds = fn(subset)
    pp = np.array([1 if x.lower().endswith('yes') else 0 for x in this_preds])
    preds.append(this_preds)
    pps.append(pp)
    accs.append((pp==labs).mean())
    print((pp==labs).mean())
        

Decomposition 0
0.6
Decomposition 1
0.74
Decomposition 2
0.54
Decomposition 3
0.56
Decomposition 4
0.6
Decomposition 5


In [None]:
best = []
best_accs = []
for x in np.argsort(accs[-10:]):
    fn = get_anachronism_fn(decompositions[x], batch_size=20)
    this_preds = fn(inputs)
    best.append(this_preds)
    pp = np.array([1 if x.lower().endswith('yes') else 0 for x in this_preds])
    acc = (pp==labels).mean()
    best_accs.append(acc)
    print(acc)
    

In [250]:
# pp

In [251]:
# labels

In [258]:
[accs[i] for i in np.argsort(accs)[-10:]]

[0.7, 0.7, 0.7, 0.7, 0.72, 0.72, 0.72, 0.72, 0.74, 0.76]

In [228]:
accs[10]

0.85

In [221]:
nidxs = np.random.choice(len(inputs), 50, replace=False)
nlabs = np.array([labels[i] for i in nidxs])
nsubset = [inputs[i] for i in nidxs]
this_preds = fn(subset)
pp = np.array([1 if x.lower().endswith('yes') else 0 for x in this_preds])
print((pp==labs).mean())


0.7


In [229]:
fn = get_anachronism_fn(decompositions[4], batch_size=20)
this_preds = fn(inputs)

In [224]:
pp = np.array([1 if x.lower().endswith('yes') else 0 for x in this_preds])
print((pp==labels).mean())

0.7347826086956522


In [230]:
pp = np.array([1 if x.lower().endswith('yes') else 0 for x in this_preds])
print((pp==labels).mean())

0.6521739130434783


In [None]:
pp = np.array([1 if x.lower().endswith('yes') else 0 for x in this_preds])

In [169]:
an = get_anachronism_fn(decompositions[0])

In [171]:
an(inputs[1])

[' The subject of the sentence is Ravi Shankar.\n2. Ravi Shankar is from the 20th century.\n3. The sentence is set in the 21st century.\n4. The time periods from steps 2 and 3 are different.\n5. The sentence contains an anachronism.\n6. YES']

In [153]:
import re

In [154]:
re.findall(r'(\d+)\.', decompositions[0], )

['2', '3', '4', '3', '5']

In [152]:
decompositions[19]

" Identify the subject of the sentence.\n2. Research the subject to determine its time period.\n3. Compare the subject's time period to the time period mentioned in the sentence.\n4. If the subject's time period is different from the time period mentioned in the sentence, the sentence contains an anachronism."

In [109]:
def anachronism(x):
    gpt3 = OpenAIModel(model="text-davinci-002",  max_length=400, quote='---', n=1)
    prompt = '''Figure out whether a sentence contains anachronisms or not, using the following steps
Steps:
1. Read the sentence and identify the subject(s) and verb(s).
2. Research the subject(s) and verb(s) to see if they are appropriate for the time period mentioned in the sentence.
3. If the subject(s) and verb(s) are appropriate for the time period, then the sentence does not contain an anachronism. If the subject(s) and verb(s) are not appropriate for the time period, then the sentence does contain an anachronism.
4. Output YES if there is an anachronism, and NO otherwise
----
Sentence: %s
1.''' % x
    return gpt3(prompt)

In [112]:
idxs = np.random.choice(len(inputs), 20, replace=False)

In [130]:
preds = []
for x in inputs:
    a = anachronism(x)
    preds.append(a)

In [131]:
labs = np.array([labels[i] for i in idxs])
pp = np.array([1 if x[0].lower().endswith('YES') else 0 for x in preds])
pp2 = [0 if x[0].endswith('NO') else 1 for x in preds]

In [134]:
(pp == np.array(labels)).mean()

0.7086956521739131

In [105]:
inputs[0]

'Dwight Eisenhower was a Chief Master Sergeant of the Space Force.'

In [108]:
inputs[1]

"Ravi Shankar's two favorite musicians were Bessie Smith and Shakira."

In [110]:
a = anachronism(inputs[1])
print(a[0])

 Read the sentence and identify the subject(s) and verb(s).
Subject(s): Ravi Shankar, Bessie Smith, Shakira
Verb(s): were

2. Research the subject(s) and verb(s) to see if they are appropriate for the time period mentioned in the sentence.
Ravi Shankar was an Indian musician born in 1920. Bessie Smith was an American singer born in 1894. Shakira is a Colombian singer born in 1977.

3. If the subject(s) and verb(s) are appropriate for the time period, then the sentence does not contain an anachronism. If the subject(s) and verb(s) are not appropriate for the time period, then the sentence does contain an anachronism.
The sentence does not contain an anachronism.

4. Output YES if there is an anachronism, and NO otherwise
NO


In [32]:
def anachronism(x):
    gpt3 = OpenAIModel(model="text-davinci-002",  max_length=200, quote='---', n=1)
    prompt = '''Given a sentence and the time periods of each entity in it, tell me if it could have happened or not.
Sentence: I wrote about shakespeare
Entities and dates:
I -> 21st century
Shakespeare -> 16th century
Could the sentence be true based on the dates alone: Yes
----
Sentence: Shakespeare wrote about me

Entities and dates:
Shakespeare -> 16th century
I -> 21st century

Could the sentence be true based on the dates alone: No
----
Sentence: %s''' % x
    return gpt3(prompt)


In [69]:
def anachronismv2(x):
    gpt3 = OpenAIModel(model="text-davinci-002",  max_length=200, quote='---', n=1)
    prompt = '''Given a sentence tell me if it could have happened or not based on the time periods of the entities.
Sentence: I wrote about shakespeare
Could the sentence be true based on the time periods alone: Yes
----
Sentence: Shakespeare wrote about me
Could the sentence be true based on the time periods alone: No
----
Sentence: %s''' % x
    return gpt3(prompt)

In [78]:
def anachronismv3(x):
    prompt = '''Given a sentence, extract the entities and their relationships, and tell me if they are anachronistic.
Sentence: I wrote about shakespeare
Relationships:
- (I, write about, shakespeare) -> not anachronistic, 
----
Sentence: Shakespeare wrote about me
Relationships:
- (shakespeare, write about, me) -> anachronistic
----
Sentence: The builders of the pyramids at Giza listened to jazz during their break.
Relationships:
- (builders, listen to, jazz) -> anachronistic
----
Sentence: %s
''' % x
    return gpt3(prompt)

In [41]:
answers = []
for x in inputs:
    answers.append(anachronism(x))

In [70]:
answers2 = []
for x in inputs:
    answers2.append(anachronismv2(x))

In [79]:

answers3 = []
for x in inputs:
    answers3.append(anachronismv3(x))

In [56]:
preds = np.array([int(x[0].endswith('No')) for x in answers])

In [71]:
preds2 = np.array([int(x[0].endswith('No')) for x in answers2])

In [83]:
preds3 = np.array([1 - int('not anachronistic' in x[0]) for x in answers3])

In [68]:
(preds == labels).mean()

0.7391304347826086

In [72]:
(preds2 == labels).mean()

0.6956521739130435

In [85]:
(preds3 == labels).mean()

0.6304347826086957

In [67]:
for x in np.where(preds != labels)[0]:
    print(inputs[x])
    print(answers[x][0])
    print(labels[x])
    print('----')

Jason, the first person to explore the ancient pyramid in over 2000 years, opened a sealed box inside and found a floppy disk.


Jason -> 21st century
ancient pyramid -> ?

Could the sentence be true based on the dates alone:

The sentence could be true if the ancient pyramid is from the 21st century.
1
----
The first ever movie in color depicted the life of Sacagawea.


Entities and dates:
The first ever movie in color -> late 19th century
Sacagawea -> late 18th century - early 19th century

Could the sentence be true based on the dates alone: No
0
----
Jason, while exploring an ancient pyramid, opened a sealed box inside and found a floppy disk.


Entities and dates:
Jason -> 21st century
floppy disk -> 1970s-2000s

Could the sentence be true based on the dates alone: No
0
----
During their meetings in Bali, George Washington and the delegate of the Tokugawa shogunate exchanged gifts.


Entities and dates:
George Washington -> 18th century
Tokugawa shogunate -> 17th century

Could th