In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from openai import OpenAI
from dotenv import load_dotenv
import os
import random
import numpy as np
import regex as re
import pandas as pd

load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_KEY')


  from .autonotebook import tqdm as notebook_tqdm


## Pilot exerpiments

In [3]:
import random

people = ['Alice', 'Bob', 'Joe', 'Dylan', 'Josh'] # mind reading
# Last location is initial one
locations = ['hole_1', 'hole_2', 'hole_3', 'hole_4','field'] # complicate the environment?
relation = 'jumps_in' # more actions/relations?
# goal directedness

      
#current_locations = {p:[locations[-1]] for p in people}

# Update the state dict so that current_locations[person][t] is the person's location at time step t
def update_state(subject, new_loc, current_locations):
    for p in people:
        if p == subject:
            current_locations[p].append(new_loc)
        else:
            current_locations[p].append(current_locations[p][-1])

print('All start at ' + locations[-1])

def run_sim(steps, seed=None):
    if seed is not None:
        random.seed(seed)
    person = ''
    loc = ''
    sequences = ''
    #global current_locations
    current_locations = {p:[locations[-1]] for p in people}
    for t in range(steps):
        person = random.choice(people)
        cur_loc = current_locations[person]
        if locations[0][:-2] in cur_loc[-1]:
            loc = locations[-1]
        else:
            loc = random.choice(locations[:-1])
        update_state(person, loc, current_locations)
        sequences += f'{relation}({person}, {loc}, {t})\n'    
        #print(f'{relation}({person}, {loc}, {t})') 
    return sequences, current_locations


# At timestep t where did subject think target was?
# Answer is wherever the subject saw target at during t or prior
# If they were in the same place at exactly t, then thats where subject knows target is
# If < t they were in the same place and target moved, then its where the target moved
# If < t they were in the same place and subject moved or both moved, its where target remains
def last_observed_change(subject, target, t, current_locations):
    target_history = current_locations[target]
    subject_history = current_locations[subject]
    #check first case
    if target_history[t] == subject_history[t]:
        return target_history[t], t
    
    # Check the second two cases
    prev = 0
    for t_i in range(t,-1,-1):
        if target_history[t_i] == subject_history[t_i]:
            prev = t_i
    if subject_history[prev+1] == subject_history[prev+1] and target_history[prev+1] != target_history[prev+1]:
        return target_history[prev+1], prev+1
    else:
        return target_history[prev], prev
   
# def last_observed_change_range(subject, target,start_t, end_t):
#     target_history = current_locations[target]
#     subject_history = current_locations[subject]
#     #check first case
#     if target_history[end_t] == subject_history[end_t]:
#         return target_history[end_t], end_t
    
#     # Check the second two cases
#     prev = 0
#     for t_i in range(end_t,start_t-1,-1):
#         if target_history[t_i] == subject_history[t_i]:
#             prev = t_i
#     if subject_history[prev+1] == subject_history[prev+1] and target_history[prev+1] != target_history[prev+1]:
#         return target_history[prev+1], prev+1
#     else:
#         return target_history[prev], prev   
   
    
def print_sequences(sequences, relation):
    # Translate to text
    s = sequences.split('\n')[:-1]
    strings = []
    for event in s:
        event = event.replace(f'{relation}(','').replace(')','')
        subject, loc, _ = event.split(',')
        # Temporal?
        #print(f'{subject} enters{loc} at time{t}')
        res = f'{subject} {relation.replace("_", " ") if "hole" in loc else relation.replace("_", " ").replace("in","out to the") }{loc}'
        print(res)
        strings.append(res)
    return '. '.join(strings)


# Where does Bob think that Alice thinks that ... Joe is?
# If Bob wrote a message in LOCATION, would Alice see?
# Bob hates anyone who jumps in hole 2. Who would Bob hate?
# Question event, who/multiple ppl who observed an event
# An observation change event is when one actor changes location without another actor viewing it.
            

All start at field


In [4]:
# Updates state
seqs = run_sim(50)
#print(seqs)

In [5]:
# Where does Bob think that Alice thinks that ... Joe is?
# If Bob wrote a message in LOCATION, would Alice see?
# Bob hates anyone who jumps in hole 2. Who would Bob hate?
# Question event, who/multiple ppl who observed an event

 
def find_question_events(t, current_locations):
    observe_matrix = []
    for p1 in people:
        observe_matrix.append([])
        for p2 in people:
            if p1 != p2:
                loc, time = last_observed_change(p1,p2,t, current_locations)
                observe_matrix[-1].append((loc, time))
            else:
                observe_matrix[-1].append(None)
    return observe_matrix

In [6]:
# At each time step, who was in what location

# who obseverved who observed who becomes tracking steps on this map.
def make_temporal_tracking_map(steps, current_locations):
    tracking_map = [{'field':set(people)}]
    for t in range(1,steps):
        location_to_ppl = dict()
        for p in people:
            curr_loc = current_locations[p][t]
            if curr_loc in location_to_ppl:
                location_to_ppl[curr_loc].add(p)
            else:
                location_to_ppl[curr_loc] = {p}
        tracking_map.append(location_to_ppl)
    return tracking_map


In [7]:
#Fix the person, see if they cause an observance event
def find_observance_event(temporal_map, target, current_locations):
    target_locations = current_locations[target]
    observers = set()
    start = 0
    res = []
    for t in range(len(temporal_map)-1):
        if target_locations[t] != target_locations[t+1]:
            if len(observers) == 0:
                observers = temporal_map[t][target_locations[t]].difference({target})
                start = t
            # Find next location change
            else:
                new_loc = target_locations[t+1]
                if temporal_map[t+1][new_loc].intersection(observers) != len(observers):
                    # People who see target coming to the new location
                    observe_incoming = temporal_map[t+1][new_loc]
                    # People who see target leaving the old location
                    observe_outgoing = temporal_map[t][target_locations[t]]
                    unaware_subjects = observers.difference(observe_incoming).difference(observe_outgoing)
                    res.append((unaware_subjects,start, t+1))
                    start = 0
                observers = set()
    return res
            
#find_observance_event(tm, 'Bob')             

### Actual test

In [8]:
n_steps = 20
test_sims, current_locations_main = run_sim(n_steps)
story = print_sequences(test_sims, relation)
s = story.split('.')
tm_test = make_temporal_tracking_map(n_steps, current_locations_main)

people = ['Alice', 'Bob', 'Joe', 'Dylan', 'Josh'] # mind reading
# Last location is initial one
locations = ['hole_1', 'hole_2', 'hole_3', 'hole_4','field'] # complicate the environment?

# Find places to ask questions.

res = []
for p in people:
    obs_events = find_observance_event(tm_test, p, current_locations_main)
    for oe in obs_events:
        if len(oe[0]) != 0:
            prompt_story = s[:oe[2]]
            for ob in oe[0]:
                print(oe[2]-1)
                res.append((prompt_story, f'Where does {ob} think {p} is?', current_locations_main[p][oe[2]-1]))
print(res)

Dylan jumps in hole_4
Bob jumps in hole_3
Josh jumps in hole_4
Alice jumps in hole_1
Dylan jumps out to the field
Bob jumps out to the field
Josh jumps out to the field
Josh jumps in hole_4
Josh jumps out to the field
Josh jumps in hole_4
Alice jumps out to the field
Dylan jumps in hole_3
Dylan jumps out to the field
Alice jumps in hole_1
Bob jumps in hole_1
Joe jumps in hole_4
Bob jumps out to the field
Dylan jumps in hole_4
Alice jumps out to the field
Dylan jumps out to the field
18
18
5
5
16
4
4
6
[(['Dylan jumps in hole_4', ' Bob jumps in hole_3', ' Josh jumps in hole_4', ' Alice jumps in hole_1', ' Dylan jumps out to the field', ' Bob jumps out to the field', ' Josh jumps out to the field', ' Josh jumps in hole_4', ' Josh jumps out to the field', ' Josh jumps in hole_4', ' Alice jumps out to the field', ' Dylan jumps in hole_3', ' Dylan jumps out to the field', ' Alice jumps in hole_1', ' Bob jumps in hole_1', ' Joe jumps in hole_4', ' Bob jumps out to the field', ' Dylan jumps i

In [9]:
n_steps = 20
test_sims_test, current_locations_test = run_sim(n_steps, seed=47)
story_test = print_sequences(test_sims_test, relation)
s_test = story_test.split('.')
tm_test_fewshot = make_temporal_tracking_map(n_steps, current_locations_test)

people = ['Alice', 'Bob', 'Joe', 'Dylan', 'Josh'] # mind reading
# Last location is initial one
locations = ['hole_1', 'hole_2', 'hole_3', 'hole_4','field'] # complicate the environment?

# Find places to ask questions.
# TODO  Why are the locations from the oe's set to the main story instead of the test one?
res_test = []
for p in people:
    obs_events = find_observance_event(tm_test_fewshot, p, current_locations_test)
    for oe in obs_events:
        if len(oe[0]) != 0:
            prompt_story = s_test[:oe[2]]
            for ob in oe[0]:
                #print(oe[2]-1)
                res_test.append((prompt_story, f'Where does {ob} think {p} is?', current_locations_test[p][oe[2]-1]))

Joe jumps in hole_1
Dylan jumps in hole_4
Josh jumps in hole_3
Joe jumps out to the field
Josh jumps out to the field
Dylan jumps out to the field
Dylan jumps in hole_1
Dylan jumps out to the field
Alice jumps in hole_1
Alice jumps out to the field
Joe jumps in hole_4
Josh jumps in hole_1
Joe jumps out to the field
Josh jumps out to the field
Dylan jumps in hole_2
Bob jumps in hole_3
Dylan jumps out to the field
Bob jumps out to the field
Alice jumps in hole_3
Joe jumps in hole_3


In [16]:
[len(r[0]) for r in res]

[19, 19, 6, 6, 17, 5, 5, 7]

##### Run GPT

In [11]:

client = OpenAI()
initial_prompt = f'Read the following story and answer the question at the end. Note that all characters start in the {locations[-1]}.'
def prompt_gpt(prompt):
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "user", "content": prompt}
        ],
    )
    
    return response.choices[0].message.content

In [12]:
results = []
example_string = ''
for r in res_test[:-1]:
    example_string += f'{".".join(r[0])}\nQ: {r[1]}\nA: {r[2]}\n\n'
# print(example_string)
    
# for r in res:
#     #gpt_res = prompt_gpt(f'{initial_prompt}\n{example_string}{r[0]}\nQ: {r[1]}\nA:')
#     gpt_res = prompt_gpt(f'{initial_prompt}\n{r[0]}\n{r[1]}')
#     results.append((gpt_res, r[2]))

In [13]:
# len(res_test),[len(r[0]) for r in res_test]

In [14]:
results

[]

#### Results


| Test                         | Acc(3.5) | Acc(4.0) |
|------------------------------|-------|---------|
| Test 1                       | 4/4       | 0/4    |
| Test 2                       | 8/9     | 4/9     |
| Test 3 (longer stories)      | 7/9     | 1/9     |
| Test 4(3 people)             | 7/7     | 4/7     |


RESULTS AFTER FIXING THE BUG

| Test                         | Acc(3.5) | Acc(4.0) |
|------------------------------|-------|---------|
| Test 1 - seq len 20, diff stories, zero-shot          | 0.86| 0.65|
| Test 2 - same as above but fix the same stories for each model          | 0.85     | 0.68  |
| Test 3 - seq len 40, fixed stories,      |   0.84   | 0.37     |
| Test 4 - seq len 20, 4-shot             |   0.88   |   0.92   |
| Test 5 - seq len 40, 4-shot             |   0.80   |   0.93   |
| Test 6 - seq len 40, 1 question per story     |   0.8(0.61,0.99)   |   0.55(0.31,0.79)   |
| Test 7 - seq len 40, 1 question per story, 4-shot     |   0.65(0.42,0.88)   |   0.8(0.61,0.99)   |





In [2]:
import pandas as pd
df = pd.read_csv('results.csv')
df.head()

Unnamed: 0,Prompt,GPT3.5,GPT4,Label
0,Joe jumps in hole_2 Josh jumps in hole_1 Bob...,hole_2,hole_2,hole_2
1,Joe jumps in hole_2 Josh jumps in hole_1 Bob...,hole_2,hole_2,hole_2
2,Alice jumps in hole_4 Alice jumps out to the ...,hole_4,hole_4,hole_4
3,Alice jumps in hole_4 Alice jumps out to the ...,hole_4,hole_4,hole_4
4,Bob jumps in hole_1 Alice jumps in hole_3 Dy...,hole_3,hole_3,hole_3


In [11]:
err = df[df['GPT4'] != df['Label']]
err

Unnamed: 0,Prompt,GPT3.5,GPT4,Label
6,Dylan jumps in hole_1 Josh jumps in hole_2 A...,hole_4,the field,hole_1
8,Alice jumps in hole_2 Bob jumps in hole_1 Bo...,hole_4,hole_4,hole_1
16,Bob jumps in hole_2 Alice jumps in hole_3 Dy...,hole_2,field,hole_2
38,Alice jumps in hole_3 Bob jumps in hole_1 Jo...,hole_3,hole_3,hole_2


In [12]:
[print(e,'\n') for e in err['Prompt']]

Dylan jumps in hole_1  Josh jumps in hole_2  Alice jumps in hole_4  Alice jumps out to the field  Dylan jumps out to the field  Dylan jumps in hole_4  Joe jumps in hole_1  Josh jumps out to the field  Dylan jumps out to the field  Alice jumps in hole_1  Dylan jumps in hole_4  Josh jumps in hole_1  Josh jumps out to the field  Bob jumps in hole_4  Dylan jumps out to the field  Dylan jumps in hole_1  Dylan jumps out to the field  Josh jumps in hole_3  Dylan jumps in hole_2  Joe jumps out to the field Where does Bob think Joe is? 

Alice jumps in hole_2  Bob jumps in hole_1  Bob jumps out to the field  Josh jumps in hole_2  Bob jumps in hole_1  Alice jumps out to the field  Bob jumps out to the field  Josh jumps out to the field  Dylan jumps in hole_3  Alice jumps in hole_4  Joe jumps in hole_2  Joe jumps out to the field  Alice jumps out to the field  Joe jumps in hole_1  Alice jumps in hole_2  Josh jumps in hole_2  Joe jumps out to the field  Alice jumps out to the field  Joe jumps in h

[None, None, None, None]