# AI Performance Assessment

In [2]:
import sys
sys.path.append("/Users/danfinkel/github/mercury-be/") 

%load_ext autoreload
%autoreload 2

## Imports

In [3]:
from sympy import use
from python.tools.turbo4 import Turbo4
from python.tools.teachable import Teachable_Turbo4
from python.tools.llmtypes import Chat, TurboTool
from python.tools import rand
from python.tools.instruments import AgentInstruments
from python.tools.llm import add_cap_ref
from python.tools.llm import getTableDefs
from python.tools.helpers import run_python


from typing import List, Callable
import os
import argparse
import dotenv
import time
import pandas as pd

## Configuration

In [4]:
# load .env vars
tmp = os.getcwd()
os.chdir("/Users/danfinkel/github/mercury-be/")
print(os.getcwd())
os.environ.pop('OPENAI_API_KEY', None)
dotenv.load_dotenv()

# dotenv.load_dotenv("/Users/danfinkel/github/mercury-be/")
# print(os.getenv("OPENAI_API_KEY"))
os.chdir(tmp)

# check to make sure the important env vars are set
assert os.environ.get("RENDER_PG_URL"), "POSTGRES_CONNECTION_URL not found in .env file"
assert os.environ.get("RENDER_PG_SCHEMA"), "DATABASE_SCHEMA not found in .env file"

# bring env variables into memory
DB_URL = os.environ.get("RENDER_PG_URL")
DATABASE_SCHEMA = os.environ.get("RENDER_PG_SCHEMA")
POSTGRES_TABLE_DEFINITION_CAP_REF = "TABLE_DEFINITIONS"

/Users/danfinkel/github/mercury-be


In [12]:
# get table definitions for tables to be used for test
table_definitions = getTableDefs(DB_URL, DATABASE_SCHEMA)

In [13]:
custom_function_tool_config = {
    "type": "function",
    "function": {
        "name": "run_python",
        "description": "This is a function that executes a python script in a local environment. The variable pythonscript is a string that will be executed as a python script.",
        "parameters": {
            "type": "object",
            "properties": {"pythonScript": {"type": "string"}},
        },
    },
}

In [19]:
def runAI(raw_prompt: str, ii: int = 0) -> str:
    
    assistant_name = "Turbo4"
    assistant = Turbo4()
    
    ai_tools = [
        TurboTool("run_python", custom_function_tool_config, run_python),
    ]

    prompt = add_cap_ref(
    "", # type: ignore
    f"Write a python script that will print an answer the QUESTION. The script should write the answer to a csv file called ANSWER_{str(ii)}.csv",
    "QUESTION:",
    raw_prompt
    )

    prompt = add_cap_ref(
        prompt,  # type: ignore
        f"\n\nThe data you need to execute the task can be found in a postgres database with {POSTGRES_TABLE_DEFINITION_CAP_REF} described below.", 
        POSTGRES_TABLE_DEFINITION_CAP_REF, 
        table_definitions # type: ignore
    )    

    prompt += '\n\n Finally, to connect to the postgres database with the datasets you need to answer this QUESTION you can use the following environmental variables:\nhost: RENDER_PG_HOST\ndatabase: RENDER_PG_NAME\nusername: RENDER_PG_USER\npassword: RENDER_PG_PASSWORD. You should assume these variables are in the environment of the python script and can be accessed with the os library.'

    print('Starting the assistant...')
    assistant, status_msg = assistant.get_or_create_assistant(assistant_name)

    print('Setting the instructions...')
    assistant, instruct_msg = assistant.set_instructions("You are an elite python developer that specializes in adtech. You generate the most concise and performant python scripts.") # type: ignore

    print('Equipping the assistant...')
    assistant = assistant.equip_tools(ai_tools, equip_on_assistant=False) # type: ignore

    print('Creating the thread...')
    assistant, thread_id = assistant.make_thread()

    print('Adding the prompt...')
    assistant, prompt_msg = assistant.add_message(prompt)

    print('Executing the thread...')
    assistant, new_msgs = assistant.run_thread()

    return new_msgs[-1]

    # print('Tee up the AI to run the code...')
    # assistant, next_step_msg = assistant.add_message("use the run_python function to run the python you have just generated.")

    # print('Run the python code and generate the file...')
    # assistant, new_msgs = assistant.run_thread(toolbox=[ai_tools[0].name]) # this is a function that executes a string of python passed into it

    # return 'finished'


In [29]:
# from tqdm import tqdm
# first prompt: What is the total reach of the campaign?
# second prompt: How many users saw an ad?
# third prompt: What is the total reach of the campaign?\n Here is a hint you can use to help answer the QUESTION. The total reach of an advertising campaign is determined by counting the number of distinct users who saw an ad. 

# first prompt: Please report daily campaign reach where reach for a given day is defined to be total number of users who were exposed in the previous 7 day window. Perform the calculation for each day from August 1 2023 to September 1 2023
# second prompt: Please report daily campaign reach from August 1 2023 to September 1 2023 with a 7 day lookback window.
# third prompt: Please report daily campaign reach from August 1 2023 to September 1 2023 with a 7 day lookback window. \n\n TIPS:- TIP 1: The reach for a given day is defined to be count of distinct users who were exposed to an ad anytime during the previous 7 days.\n- TIP 2: The BETWEEN SQL function is inclusive of its bounds. That means that when using BETWEEN for a 7 day window calculation you should use 6 days in the calculation.\n
import numpy as np
import pandas as pd

results = []
successfulRuns = 0
badRuns = 0
for ii in np.arange(20):
    print(f'Starting iteration {str(ii)}...')
    try:
        aiRun = runAI('''Please report daily campaign reach from August 1 2023 to September 1 2023 with a 7 day lookback window.\n''', ii=ii)
        pscript = aiRun.message.split('```python\n')[1].split('```')[0] # type: ignore
        try:
            run_python(pscript)
        except:
            print('Script Failed')
        
        # if aiRun == 'finished':
        #     successfulRuns += 1
        #     print('Success!')
        #     results.append(pd.read_csv(f'ANSWER_{str(ii)}.csv'))
        # else:
        #     print('Failure!')
        #     badRuns += 1
    except:
        print('Failure!')
        badRuns += 1


Starting iteration 0...
Starting the assistant...
get_or_create_assistant(Turbo4, gpt-4-1106-preview)
Setting the instructions...
set_instructions()
Equipping the assistant...
equip_tools([TurboTool(name='run_python', config={'type': 'function', 'function': {'name': 'run_python', 'description': 'This is a function that executes a python script in a local environment. The variable pythonscript is a string that will be executed as a python script.', 'parameters': {'type': 'object', 'properties': {'pythonScript': {'type': 'string'}}}}}, function=<function run_python at 0x1148c4e50>)], False)
Creating the thread...
make_thread()
Adding the prompt...
add_message( Write a python script that will print an answer the QUESTION. The script should write the answer to a csv file called ANSWER_0.csv

QUESTION:

Please report daily campaign reach from August 1 2023 to September 1 2023 with a 7 day lookback window.
 

The data you need to execute the task can be found in a postgres database with TABL

  data = pd.read_sql_query(query, conn, params=(start_date, end_date))
Traceback (most recent call last):
  File "/Users/danfinkel/.pyenv/versions/3.9.13/envs/llm/lib/python3.9/site-packages/pandas/io/sql.py", line 2262, in execute
    cur.execute(sql, *args)
psycopg2.errors.FeatureNotSupported: DISTINCT is not implemented for window functions
LINE 3:                 COUNT(DISTINCT e.userid) OVER (ORDER BY e.ex...
                        ^


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/Users/danfinkel/github/mercury-be/python/notebooks/./pythonscript/runpython.py", line 35, in <module>
    data = pd.read_sql_query(query, conn, params=(start_date, end_date))
  File "/Users/danfinkel/.pyenv/versions/3.9.13/envs/llm/lib/python3.9/site-packages/pandas/io/sql.py", line 486, in read_sql_query
    return pandas_sql.read_query(
  File "/Users/danfinkel/.pyenv/versions/3.9.13/envs/llm/lib/python3.9/site-packages/pandas/io/sql.

Starting iteration 2...
Starting the assistant...
get_or_create_assistant(Turbo4, gpt-4-1106-preview)
Setting the instructions...
set_instructions()
Equipping the assistant...
equip_tools([TurboTool(name='run_python', config={'type': 'function', 'function': {'name': 'run_python', 'description': 'This is a function that executes a python script in a local environment. The variable pythonscript is a string that will be executed as a python script.', 'parameters': {'type': 'object', 'properties': {'pythonScript': {'type': 'string'}}}}}, function=<function run_python at 0x1148c4e50>)], False)
Creating the thread...
make_thread()
Adding the prompt...
add_message( Write a python script that will print an answer the QUESTION. The script should write the answer to a csv file called ANSWER_2.csv

QUESTION:

Please report daily campaign reach from August 1 2023 to September 1 2023 with a 7 day lookback window.
 

The data you need to execute the task can be found in a postgres database with TABL

  df = pd.read_sql_query(query, conn)


Starting iteration 8...
Starting the assistant...
get_or_create_assistant(Turbo4, gpt-4-1106-preview)
Setting the instructions...
set_instructions()
Equipping the assistant...
equip_tools([TurboTool(name='run_python', config={'type': 'function', 'function': {'name': 'run_python', 'description': 'This is a function that executes a python script in a local environment. The variable pythonscript is a string that will be executed as a python script.', 'parameters': {'type': 'object', 'properties': {'pythonScript': {'type': 'string'}}}}}, function=<function run_python at 0x1148c4e50>)], False)
Creating the thread...
make_thread()
Adding the prompt...
add_message( Write a python script that will print an answer the QUESTION. The script should write the answer to a csv file called ANSWER_8.csv

QUESTION:

Please report daily campaign reach from August 1 2023 to September 1 2023 with a 7 day lookback window.
 

The data you need to execute the task can be found in a postgres database with TABL

  File "/Users/danfinkel/github/mercury-be/python/notebooks/./pythonscript/runpython.py", line 27
    lookback_date = date - timedelta(days=lookend_date = date
                                                       ^
SyntaxError: invalid syntax


Failure!
Starting iteration 12...
Starting the assistant...
get_or_create_assistant(Turbo4, gpt-4-1106-preview)
Setting the instructions...
set_instructions()
Equipping the assistant...
equip_tools([TurboTool(name='run_python', config={'type': 'function', 'function': {'name': 'run_python', 'description': 'This is a function that executes a python script in a local environment. The variable pythonscript is a string that will be executed as a python script.', 'parameters': {'type': 'object', 'properties': {'pythonScript': {'type': 'string'}}}}}, function=<function run_python at 0x1148c4e50>)], False)
Creating the thread...
make_thread()
Adding the prompt...
add_message( Write a python script that will print an answer the QUESTION. The script should write the answer to a csv file called ANSWER_12.csv

QUESTION:

Please report daily campaign reach from August 1 2023 to September 1 2023 with a 7 day lookback window.
 

The data you need to execute the task can be found in a postgres databas

  df = pd.read_sql(query, conn, params=(start_date-timedelta(days=lookback_days), end_date))
  df = pd.read_sql(query, conn, params=(start_date-timedelta(days=lookback_days), end_date))


Starting iteration 13...
Starting the assistant...
get_or_create_assistant(Turbo4, gpt-4-1106-preview)
Setting the instructions...
set_instructions()
Equipping the assistant...
equip_tools([TurboTool(name='run_python', config={'type': 'function', 'function': {'name': 'run_python', 'description': 'This is a function that executes a python script in a local environment. The variable pythonscript is a string that will be executed as a python script.', 'parameters': {'type': 'object', 'properties': {'pythonScript': {'type': 'string'}}}}}, function=<function run_python at 0x1148c4e50>)], False)
Creating the thread...
make_thread()
Adding the prompt...
add_message( Write a python script that will print an answer the QUESTION. The script should write the answer to a csv file called ANSWER_13.csv

QUESTION:

Please report daily campaign reach from August 1 2023 to September 1 2023 with a 7 day lookback window.
 

The data you need to execute the task can be found in a postgres database with TA

Traceback (most recent call last):
  File "/Users/danfinkel/github/mercury-be/python/notebooks/./pythonscript/runpython.py", line 50, in <module>
    cur.execute(sql_query, {'start_date': lookback_start, 'end_date': lookback_end})
KeyError: 'lookback_start'


Starting iteration 15...
Starting the assistant...
get_or_create_assistant(Turbo4, gpt-4-1106-preview)
Setting the instructions...
set_instructions()
Equipping the assistant...
equip_tools([TurboTool(name='run_python', config={'type': 'function', 'function': {'name': 'run_python', 'description': 'This is a function that executes a python script in a local environment. The variable pythonscript is a string that will be executed as a python script.', 'parameters': {'type': 'object', 'properties': {'pythonScript': {'type': 'string'}}}}}, function=<function run_python at 0x1148c4e50>)], False)
Creating the thread...
make_thread()
Adding the prompt...
add_message( Write a python script that will print an answer the QUESTION. The script should write the answer to a csv file called ANSWER_15.csv

QUESTION:

Please report daily campaign reach from August 1 2023 to September 1 2023 with a 7 day lookback window.
 

The data you need to execute the task can be found in a postgres database with TA

In [26]:
pscript = aiRun.message.split('```python\n')[1].split('```')[0]


''

In [24]:
""" 
Prompt: What is the total reach of the campaign?

Good answer:
SELECT SUM(weight) as total_reach
FROM campaign.universe uni
INNER JOIN campaign.exposures exp ON uni.userid = exp.userid

Fine answer:
SELECT COUNT(DISTINCT userid) FROM campaign.exposures


Bad answer:
SELECT SUM(weight) AS total_reach
FROM campaign.universe

V bad answer:
SELECT SUM(DISTINCT weight) FROM campaign.universe u JOIN campaign.exposures e ON u.userid = e.userid

RESULTS:
 - Successful runs: 20
 - Bad runs: 0

 - Correct answer: 4
 - Other answers: 25K - 11, 100K - 4, 1 - 1
 
"""
print(successfulRuns, badRuns)

19 1


In [None]:
""" 
Prompt: How many users saw an ad?

Good answer:
SELECT COUNT(DISTINCT userid) FROM campaign.exposures;


RESULTS:
 - Successful runs: 19
 - Bad runs: 1

 - Correct answer: 19
 - Other answers: 
 
"""
print(successfulRuns, badRuns)

In [None]:
"""
Prompt: What is the total reach of the campaign?\n Here is a hint you can use to help answer the QUESTION. The total reach of an advertising campaign is determined by counting the number of distinct users who saw an ad.
"""

In [8]:
raw_prompt = '''Please report daily campaign reach from August 1 2023 to September 1 2023 with a 7 day lookback window. \n\n TIPS:- TIP 1: The reach for a given day is defined to be count of distinct users who were exposed to an ad anytime during the previous 7 days.\n- TIP 2: The BETWEEN SQL function is inclusive of its bounds. That means that when using BETWEEN for a 7 day window calculation you should use 6 days in the calculation.\n''',
assistant_name = "Turbo4"
assistant = Turbo4()
    
ai_tools = [
        TurboTool("run_python", custom_function_tool_config, run_python),
    ]

prompt = add_cap_ref(
    "", # type: ignore
    f"Write a python script that will print an answer the QUESTION. The script should write the answer to a csv file called ANSWER_{str(ii)}.csv",
    "QUESTION:",
    raw_prompt
    )

prompt = add_cap_ref(
        prompt,  # type: ignore
        f"\n\nThe data you need to execute the task can be found in a postgres database with {POSTGRES_TABLE_DEFINITION_CAP_REF} described below.", 
        POSTGRES_TABLE_DEFINITION_CAP_REF, 
        table_definitions # type: ignore
    )    

prompt += '\n\n Finally, to connect to the postgres database with the datasets you need to answer this QUESTION you can use the following environmental variables:\nhost: RENDER_PG_HOST\ndatabase: RENDER_PG_NAME\nusername: RENDER_PG_USER\npassword: RENDER_PG_PASSWORD. You should assume these variables are in the environment of the python script and can be accessed with the os library.'

print('Starting the assistant...')
assistant, status_msg = assistant.get_or_create_assistant(assistant_name)

Starting the assistant...
get_or_create_assistant(Turbo4, gpt-4-1106-preview)


AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-7B40K***************************************dFDZ. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

In [9]:
os.getenv("OPENAI_API_KEY")

'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'