In [2]:
%%time
import pal
import json
import random
import io
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta, MO
from pal.prompt import math_prompts, colored_object_prompt, penguin_prompt, date_understanding_prompt, algorithmic_prompt
from tqdm.notebook import tqdm

CPU times: user 804 ms, sys: 133 ms, total: 936 ms
Wall time: 1.39 s


In [3]:
data = {"Color": ["Cyan", "Green", "Red", "Grey"], "Object": ["Phone", "Apple", "Cup", "Lamp"], "Position": [1, 2, 3, 4]}

# Create the DataFrame
df = pd.DataFrame(data)
print(df.query('Object == "Cup"'))
print(df["Color"][0])

  Color Object  Position
2   Red    Cup         3
Cyan


In [6]:
prompt = """Whenever you synthesize information and perform reasoning, it is a Thought. When you need to perform an operation on a table, it is an Action. There are 7 types of actions: 
(1) CREATE_START[data]CREATE_END: CREATE takes a data definition of a table which will be used to generate a pandas dataframe, so make sure that the input does not contain anything unnecessary for the pandas dataframe input generation. Write the data in json format.
(2) GET_VALUE_START[row_number;column]GET_VALUE_END: GET_VALUE takes the row_number, e.g., 0 is the 1st row of the table, and the name of a column, e.g., age, and returns the value of the cell, e.g., the age in the first row of the table.
(3) FIND_START[query]FIND_END: FIND takes a query, similarly to COUNT, and returns the entries in the table that satisfies the query. For example, FIND_START[height < 13]FIND_END would return the entries whose value in the column "height" is less than 13. IMPORTANT: This will not change the entries of the table. 
(4) FILTER_START[query]FILTER_END: FILTER takes a query, similarly to FIND, but instead changes the table such that only entries that satisfy the query are preserved. For example, FILTER_START[age < 18]FILTER_END would alter the table so that only entries whose value in the column "age" is less than 18 would be preserved. IMPORTANT: This DOES change the table.
(5) ADD_START[row]ADD_END: ADD takes a row and adds it to the table. Write the data in json format. NOTE: This does not necessarily preserve any previous sorted order.
(6) SUM_START[column]SUM_END: SUM takes a column and takes the sum of all the elements in that column. For example, SUM_START[age]SUM_END would sum all the values in the column "age". 
(7) FINISH_START[answer]FINISH_END: When you finish the question, write your answer as FINISH_START[answer]FINISH_END and terminate your response.
Terminate your response after an action. Then, an Observation is returned by the user. IMPORTANT: YOU DO NOT WRITE OBSERVATIONS. THEY WILL BE PROVIDED TO YOU. You will be given a question and you will perform Thoughts and Actions until you get the answer. When you get the answer, you will perform the Action called FINISH. While you are reasoning out solutions, you will continually refer back to relevant portions of the question and employ logical reasoning to determine your next action. Ensure all the inputs to your actions are perfectly accurate.
"""
color_messages = [
    {"role": "system", "content": "You are a helpful assistant that accurately solves reasoning questions about colored objects by acting as an agent who will perform Thoughts, where you will perform logical reasoning, and Actions, which perform functions and operations."},
    {"role": "user", "content": prompt + '\n' + """Question: On the table, you see a bunch of objects arranged in a row: a purple paperclip, a pink stress ball, a brown keychain, a green scrunchiephone charger, and a mauve fidget spinner. What is the color of the object directly to the right of the stress ball?"""},
    {"role": "assistant", "content": """Thought: First, I need to create a table containing the colors of the objects, the names of the objects, and their positions on the table.
Action: CREATE_START[{"Color": ["Purple", "Pink", "Brown", "Green", "Mauve"], "Name": ["Paperclip", "Stress Ball", "Keychain", "Scrunchiephone Charger", "Fidget Spinner"], "Position": [1, 2, 3, 4, 5]}]CREATE_END"""},
    {"role": "user", "content": "Observation: Created table"},
    {"role": "assistant", "content": """Thought: Now I first need to find which position the Stress Ball is in. I can do this by finding the entry with that name. I will use the FIND action to avoid changing the table.
Action: FIND_START[Name == 'Stress Ball']FIND_END"""},
    {"role": "user", "content": """Observation: Found results:
Color         Name  Position
0  Pink  Stress Ball         2"""},
    {"role": "assistant", "content": """Thought: Therefore, the position of the Stress Ball is 2. Thus, the position of the object to the right of it is 3. To get the color, I can find the entry with that position. I will use the FIND action to avoid changing the table..
Action: FIND_START[Position == 3]FIND_END"""},
    {"role": "user", "content": """Observation: Found results:
Color      Name  Position
0  Brown  Keychain         3"""},
    {"role": "assistant", "content": """Thought: Thus, the color is Brown.
Action: FINISH_START[Brown]FINISH_END"""},
    {"role": "user", "content": """Question: There are 2 green apples, 3 red phones, 1 green phone, 8 green cups, 1 pink phone, and 2 red cups. If I remove all the green items, how many phones are left?"""},
    {"role": "assistant", "content": """Thought: First, I need to create a table containing the colors of the objects, the names of the objects, and how many times they appear.
Action: CREATE_START[{"Color": ["green", "red", "green", "green", "pink", "red"], "Object": ["apple", "phone", "phone", "cup", "phone", "cup"], "Count": [2, 3, 1, 8, 1, 2]}]CREATE_END"""},
    {"role": "user", "content": """Observation: Created table"""},
    {"role": "assistant", "content": """Thought: I first need to remove all the green items, so I will use the FILTER action.
Action: FILTER_START[Color != "green"]FILTER_END"""},
    {"role": "user", "content": """Observation: Changed the table to:
  Color Object  Count
1   red  phone      3
4  pink  phone      1
5   red    cup      2"""},
    {"role": "assistant", "content": """Thought: Now to find the number of phones, I first remove all items that aren't phones. To do this, I will use the FILTER action.
Action: FILTER_START[Object == "phone"]FILTER_END"""},
    {"role": "user", "content": """Observation: Changed the table to:
  Color Object  Count
1   red  phone      3
4  pink  phone      1"""},
    {"role": "assistant", "content": """Thought: Finally, to get the number of phones left, I take the sum of the Count column.
Action: SUM_START[Count]SUM_END"""},
    {"role": "user", "content": """Observation: 4"""},
    {"role": "assistant", "content": """Thought: Therefore, there are 4 phones left.
Action: FINISH_START[4]FINISH_END"""},
    {"role": "user", "content": """Question: There is a cyan phone, a green apple, a red cup, and a grey lamp on the table in that order. What is the color of the item furthest from the cup?"""},
    {"role": "assistant", "content": """Thought: I first need to create the table.
Action: CREATE_START[{"Color": ["Cyan", "Green", "Red", "Grey"], "Object": ["Phone", "Apple", "Cup", "Lamp"], "Position": [1, 2, 3, 4]}]CREATE_END"""},
    {"role": "user", "content": """Observation: Created table"""},
    {"role": "assistant", "content": """Thought: I first need to find the position of the cup.
Action: FIND_START[Object == "Cup"]FIND_END"""},
    {"role": "user", "content": """Observation: Found results
  Color Object  Position
2   Red    Cup         3"""},
    {"role": "assistant", "content": """Thought: So the position of the cup is 3. The positions at the endpoints are 1 and 4. Now 4-3=1, while 3-1=2 and since 2>1, the item furthest from the cup is in position 1. I therefore need to find the color of the item in position 1.
Action: GET_VALUE_START[0;Color]GET_VALUE_END"""},
    {"role": "user", "content": """Observation: Cyan"""},
    {"role": "assistant", "content": """Thought: So the color of the object furthest from the cup is cyan.
Action: FINISH_START[Cyan]FINISH_END"""}
]
for message in color_messages:
    print(message["role"].upper())
    print(message["content"])
    print("")

SYSTEM
You are a helpful assistant that accurately solves reasoning questions about colored objects by acting as an agent who will perform Thoughts, where you will perform logical reasoning, and Actions, which perform functions and operations.

USER
Whenever you synthesize information and perform reasoning, it is a Thought. When you need to perform an operation on a table, it is an Action. There are 7 types of actions: 
(1) CREATE_START[data]CREATE_END: CREATE takes a data definition of a table which will be used to generate a pandas dataframe, so make sure that the input does not contain anything unnecessary for the pandas dataframe input generation. Write the data in json format.
(2) GET_VALUE_START[row_number;column]GET_VALUE_END: GET_VALUE takes the row_number, e.g., 0 is the 1st row of the table, and the name of a column, e.g., age, and returns the value of the cell, e.g., the age in the first row of the table.
(3) FIND_START[query]FIND_END: FIND takes a query, similarly to COUNT,

In [5]:
interface = pal.interface.ProgramInterface(
  model='gpt-4o-mini',
  stop='\n\n\n', # stop generation str for Codex API
  get_answer_expr='solution()' # python expression evaluated after generated code to obtain answer 
)


json_list = None
max_count = 100
counter = 0
responses = []
with open('datasets/reasoning_about_colored_objects.json', 'r') as json_file:
    json_list = json.load(json_file)
json_list = json_list["examples"]
jsonSamples = random.sample(range(0, len(json_list)), max_count)
print(jsonSamples)
#jsonSamples = [819, 1000, 699, 749, 580, 366, 1275, 203, 492, 773]
#jsonSamples = [269, 436, 836, 1291, 1283, 712, 391, 474, 1164, 783]
#jsonSamples = [1719, 612, 1997, 744, 1984, 1693, 1734, 767, 359, 1320]
for json_pos in tqdm(jsonSamples, desc="Going through Colored Object questions"):
    result = json_list[json_pos]
    question = result["input"]
    target = result["target_scores"]
    curResponse = {"question": question, "target": target}
    messages = []
    for message in color_messages:
        messages.append(message)
    messages.append({"role": "user", "content": prompt + '\n' + "Question: " + question})
    finished = False
    curTable = None
    numIters = 0
    maxIters = 20
    while not finished:
        if numIters >= maxIters:
            print(json_pos)
            print("Maximum number of iterations reached")
            print("##################################\n")
            curResponse["received"] = -987654321
            break
        numIters += 1
        gens = interface.generate_history(messages=messages, max_tokens=1024)
        output = gens[0]
        #print(output)
        messages.append({"role": "assistant", "content": output})
        curAction = output[output.find("Action: ") + len("Action: "):]
        #print("Action: " + curAction)
        if "finish" in curAction.lower():
            finished = True
            expression = curAction[curAction.find("FINISH_START[") + len("FINISH_START["):curAction.find("]FINISH_END")]
            curResponse["received"] = expression.lower()
            #print(expression)
        else:
            evaled = None
            expression = None
            if "CREATE" in curAction:
                expression = curAction[curAction.find("CREATE_START[") + len("CREATE_START["):curAction.find("]CREATE_END")]
                try:
                    data = json.loads(expression)
                    curTable = pd.DataFrame(data)
                    evaled = "Created table"
                except Exception as e:
                    evaled = f"That was not a valid input. This was the exception: {e}. Try again. "
                #print(curTable)
            elif "COUNT" in curAction:
                expression = curAction[curAction.find("COUNT_START[") + len("COUNT_START["):curAction.find("]COUNT_END")]
                try:
                    evaled = str(len(curTable.query(expression)))
                except Exception as e:
                    evaled = f"That was not a valid query. This was the exception: {e}. It is possible that (1) The table does not exist yet. (2) The query is invalid."
            elif "FIND" in curAction:
                expression = curAction[curAction.find("FIND_START[") + len("FIND_START["):curAction.find("]FIND_END")]
                try:
                    evaled = "Found results:\n"
                    evaled += curTable.query(expression).reset_index(drop=True).to_string()
                except Exception as e:
                    evaled = f"That was not a valid query. This was the exception: {e}. It is possible that (1) The table does not exist yet. (2) The query is invalid."
            elif "FILTER" in curAction:
                expression = curAction[curAction.find("FILTER_START[") + len("FILTER_START["):curAction.find("]FILTER_END")]
                try:
                    curTable = curTable.query(expression)
                    curTable = curTable.reset_index(drop=True)
                    evaled = "Changed the table to:\n"
                    evaled += curTable.to_string()
                except Exception as e:
                    evaled = f"That was not a valid query. This was the exception: {e}. It is possible that (1) The table does not exist yet. (2) The query is invalid."
            elif "GET_VALUE" in curAction:
                expression = curAction[curAction.find("GET_VALUE_START[") + len("GET_VALUE_START["):curAction.find("]GET_VALUE_END")]
                expression = expression.split(';')
                expression[0] = int(expression[0])
                try:
                    evaled = str(curTable[expression[1]][expression[0]])
                except Exception as e:
                    evaled = f"That was not a valid input. This was the exception: {e}. It is possible that (1) The table does not exist yet. (2) The column name is wrong."
            elif "ADD" in curAction:
                expression = curAction[curAction.find("ADD_START[") + len("ADD_START["):curAction.find("]ADD_END")]
                try:
                    row = json.loads(expression)
                    new_row = pd.Series(row)
                    curTable = pd.concat([curTable, pd.DataFrame([new_row])], ignore_index=True)
                    evaled = "Added row."
                except Exception as e:
                    evaled = f"That was not a valid input. This was the exception: {e}. Try again."
            elif "SUM" in curAction:
                column = curAction[curAction.find("SUM_START[") + len("SUM_START["):curAction.find("]SUM_END")]
                if column in curTable.columns:
                    try:
                        evaled = str(curTable[column].sum())
                    except Exception as e:
                        evaled = f"That was not a valid input. This was the exception: {e}. Try again."
                else:
                    evaled = f"Column '{column}' does not exist in the table."
            else:
                evaled = "That was not a valid action. Please input a valid one."
            #print("Expression: ", end = "")
            #print(expression)
            messages.append({"role": "user", "content": "Observation: " + evaled})
            #print("Evaluation: ", end="")
            #print(evaled)
            #print(messages[-1]["content"])
    curResponse["messages"] = messages
    responses.append(curResponse)
numCorrect = 0
for response in responses:
    if ("received" in response.keys()) and (response["received"] in response["target"].keys()) and (response["target"][response["received"]] == 1):
        numCorrect += 1
    #else:
        #print("Received: " + response["received"])
        #print("Target: ", end="")
        #print(response["target"])
        #for message in response["messages"][len(color_messages):]:
        #    print(message["content"])
        #print("")
print("Number correct = " + str(numCorrect))

[500, 1872, 593, 658, 852, 1300, 130, 1554, 1293, 924, 923, 284, 1858, 1187, 1745, 1889, 1388, 942, 1825, 570, 1154, 802, 415, 361, 1733, 88, 782, 1522, 102, 1195, 1987, 13, 1891, 1869, 512, 1447, 1221, 73, 1312, 1203, 454, 1120, 1442, 1375, 872, 848, 1399, 1614, 1567, 385, 1180, 624, 476, 464, 822, 655, 66, 503, 406, 182, 203, 1517, 417, 147, 1572, 1925, 855, 172, 1555, 1011, 108, 1756, 1428, 1603, 1140, 1496, 1766, 1188, 432, 819, 301, 1881, 1659, 1647, 785, 1008, 820, 1640, 1270, 1119, 1507, 727, 63, 1576, 1511, 563, 1333, 1845, 561, 91]


Going through Colored Object questions:   0%|          | 0/100 [00:00<?, ?it/s]

AttributeError: module 'openai' has no attribute 'error'

In [14]:
numCorrect = 0
for response in responses:
    if (response["received"] in response["target"].keys()) and (response["target"][response["received"]] == 1):
        numCorrect += 1
print(numCorrect)
print(len(responses))

0
0


In [19]:
with open('coloredobject-prompting-01-results-gpt-4o-mini.json', 'w') as f:
    json.dump(responses, f)