# AI Performance Assessment

In [1]:
import sys
sys.path.append("/Users/danfinkel/github/mercury-be/") 

%load_ext autoreload
%autoreload 2

## Imports

In [14]:
from sympy import use
from python.tools.turbo4 import Turbo4
from python.tools.teachable import Teachable_Turbo4
from python.tools.llmtypes import Chat, TurboTool
from python.tools import rand
from python.tools.instruments import AgentInstruments
from python.tools.llm import add_cap_ref
from python.tools.llm import getTableDefs
from python.tools.helpers import run_python


from typing import List, Callable
import os
import argparse
import dotenv
import time
import pandas as pd

## Configuration

In [3]:
# load .env vars
# tmp = os.getcwd()
# os.chdir("/Users/danfinkel/github/mercury-be/")
# print(os.getcwd())
dotenv.load_dotenv("/Users/danfinkel/github/mercury-be/")
# os.chdir(tmp)

# check to make sure the important env vars are set
assert os.environ.get("RENDER_PG_URL"), "POSTGRES_CONNECTION_URL not found in .env file"
assert os.environ.get("RENDER_PG_SCHEMA"), "DATABASE_SCHEMA not found in .env file"

# bring env variables into memory
DB_URL = os.environ.get("RENDER_PG_URL")
DATABASE_SCHEMA = os.environ.get("RENDER_PG_SCHEMA")
POSTGRES_TABLE_DEFINITION_CAP_REF = "TABLE_DEFINITIONS"

In [4]:
# get table definitions for tables to be used for test
table_definitions = getTableDefs(DB_URL, DATABASE_SCHEMA)

In [5]:
custom_function_tool_config = {
    "type": "function",
    "function": {
        "name": "run_python",
        "description": "This is a function that executes a python script in a local environment. The variable pythonscript is a string that will be executed as a python script.",
        "parameters": {
            "type": "object",
            "properties": {"pythonScript": {"type": "string"}},
        },
    },
}

In [17]:
def runAI(raw_prompt: str, ii: int = 0) -> str:
    
    assistant_name = "Turbo4"
    assistant = Turbo4()
    
    ai_tools = [
        TurboTool("run_python", custom_function_tool_config, run_python),
    ]

    prompt = add_cap_ref(
    "", # type: ignore
    f"Write a python script that will print an answer the QUESTION. The script should write the answer to a csv file called ANSWER_{str(ii)}.csv",
    "QUESTION:",
    raw_prompt
    )

    prompt = add_cap_ref(
        prompt,  # type: ignore
        f"\n\nThe data you need to execute the task can be found in a postgres database with {POSTGRES_TABLE_DEFINITION_CAP_REF} described below.", 
        POSTGRES_TABLE_DEFINITION_CAP_REF, 
        table_definitions # type: ignore
    )    

    prompt += '\n\n Finally, to connect to the postgres database with the datasets you need to answer this QUESTION you can use the following environmental variables:\nhost: RENDER_PG_HOST\ndatabase: RENDER_PG_NAME\nusername: RENDER_PG_USER\npassword: RENDER_PG_PASSWORD. You should assume these variables are in the environment of the python script and can be accessed with the os library.'

    print('Starting the assistant...')
    assistant, status_msg = assistant.get_or_create_assistant(assistant_name)

    print('Setting the instructions...')
    assistant, instruct_msg = assistant.set_instructions("You are an elite python developer that specializes in adtech. You generate the most concise and performant python scripts.") # type: ignore

    print('Equipping the assistant...')
    assistant = assistant.equip_tools(ai_tools, equip_on_assistant=False) # type: ignore

    print('Creating the thread...')
    assistant, thread_id = assistant.make_thread()

    print('Adding the prompt...')
    assistant, prompt_msg = assistant.add_message(prompt)

    print('Executing the thread...')
    assistant, new_msgs = assistant.run_thread()

    print('Tee up the AI to run the code...')
    assistant, next_step_msg = assistant.add_message("use the run_python function to run the python you have just generated.")

    print('Run the python code and generate the file...')
    assistant, new_msgs = assistant.run_thread(toolbox=[ai_tools[0].name]) # this is a function that executes a string of python passed into it

    return 'finished'


In [23]:
# from tqdm import tqdm
# first prompt: What is the total reach of the campaign?
# second prompt: How many users saw an ad?
# third prompt: What is the total reach of the campaign?\n Here is a hint you can use to help answer the QUESTION. The total reach of an advertising campaign is determined by counting the number of distinct users who saw an ad. 

# first prompt: Please report daily campaign reach where reach for a given day is defined to be total number of users who were exposed in the previous 7 day window. Perform the calculation for each day from August 1 2023 to September 1 2023
# second prompt: Please calculate daily campaign reach. Use a 7 day lookback window when determining the reach for a given day. Perform the calculation for each day from August 1 2023 to September 1 2023
import numpy as np
import pandas as pd

results = []
successfulRuns = 0
badRuns = 0
for ii in np.arange(20):
    print(f'Starting iteration {str(ii)}...')
    try:
        aiRun = runAI('''Please calculate daily campaign reach. Use a 7 day lookback window when determining the reach for a given day. Perform the calculation for each day from August 1 2023 to September 1 2023''', ii=ii)
        if aiRun == 'finished':
            successfulRuns += 1
            print('Success!')
            results.append(pd.read_csv(f'ANSWER_{str(ii)}.csv'))
        else:
            print('Failure!')
            badRuns += 1
    except:
        print('Failure!')
        badRuns += 1


Starting iteration 0...
Starting the assistant...
get_or_create_assistant(Turbo4, gpt-4-1106-preview)
Setting the instructions...
set_instructions()
Equipping the assistant...
equip_tools([TurboTool(name='run_python', config={'type': 'function', 'function': {'name': 'run_python', 'description': 'This is a function that executes a python script in a local environment. The variable pythonscript is a string that will be executed as a python script.', 'parameters': {'type': 'object', 'properties': {'pythonScript': {'type': 'string'}}}}}, function=<function run_python at 0x10cf7de50>)], False)
Creating the thread...
make_thread()
Adding the prompt...
add_message( Write a python script that will print an answer the QUESTION. The script should write the answer to a csv file called ANSWER_0.csv

QUESTION:

Please report daily campaign reach where reach for a given day is defined to be total number of users who were exposed in the previous 7 day window. Perform the calculation for each day from

Traceback (most recent call last):
  File "/Users/danfinkel/github/mercury-be/python/notebooks/./pythonscript/runpython.py", line 56, in <module>
    total_reach = sum(row[1] for row in result if row[0] < current_date)
  File "/Users/danfinkel/github/mercury-be/python/notebooks/./pythonscript/runpython.py", line 56, in <genexpr>
    total_reach = sum(row[1] for row in result if row[0] < current_date)
TypeError: can't compare datetime.datetime to datetime.date


Success!
Starting iteration 11...
Starting the assistant...
get_or_create_assistant(Turbo4, gpt-4-1106-preview)
Setting the instructions...
set_instructions()
Equipping the assistant...
equip_tools([TurboTool(name='run_python', config={'type': 'function', 'function': {'name': 'run_python', 'description': 'This is a function that executes a python script in a local environment. The variable pythonscript is a string that will be executed as a python script.', 'parameters': {'type': 'object', 'properties': {'pythonScript': {'type': 'string'}}}}}, function=<function run_python at 0x10cf7de50>)], False)
Creating the thread...
make_thread()
Adding the prompt...
add_message( Write a python script that will print an answer the QUESTION. The script should write the answer to a csv file called ANSWER_11.csv

QUESTION:

Please report daily campaign reach where reach for a given day is defined to be total number of users who were exposed in the previous 7 day window. Perform the calculation for ea

  df_exposures = pd.read_sql_query(


Success!
Starting iteration 13...
Starting the assistant...
get_or_create_assistant(Turbo4, gpt-4-1106-preview)
Setting the instructions...
set_instructions()
Equipping the assistant...
equip_tools([TurboTool(name='run_python', config={'type': 'function', 'function': {'name': 'run_python', 'description': 'This is a function that executes a python script in a local environment. The variable pythonscript is a string that will be executed as a python script.', 'parameters': {'type': 'object', 'properties': {'pythonScript': {'type': 'string'}}}}}, function=<function run_python at 0x10cf7de50>)], False)
Creating the thread...
make_thread()
Adding the prompt...
add_message( Write a python script that will print an answer the QUESTION. The script should write the answer to a csv file called ANSWER_13.csv

QUESTION:

Please report daily campaign reach where reach for a given day is defined to be total number of users who were exposed in the previous 7 day window. Perform the calculation for ea

Traceback (most recent call last):
  File "/Users/danfinkel/github/mercury-be/python/notebooks/./pythonscript/runpython.py", line 49, in <module>
    reach_data = [
  File "/Users/danfinkel/github/mercury-be/python/notebooks/./pythonscript/runpython.py", line 52, in <listcomp>
    if start_date <= date <= end_date
TypeError: can't compare datetime.datetime to datetime.date


Success!
Starting iteration 18...
Starting the assistant...
get_or_create_assistant(Turbo4, gpt-4-1106-preview)
Setting the instructions...
set_instructions()
Equipping the assistant...
equip_tools([TurboTool(name='run_python', config={'type': 'function', 'function': {'name': 'run_python', 'description': 'This is a function that executes a python script in a local environment. The variable pythonscript is a string that will be executed as a python script.', 'parameters': {'type': 'object', 'properties': {'pythonScript': {'type': 'string'}}}}}, function=<function run_python at 0x10cf7de50>)], False)
Creating the thread...
make_thread()
Adding the prompt...
add_message( Write a python script that will print an answer the QUESTION. The script should write the answer to a csv file called ANSWER_18.csv

QUESTION:

Please report daily campaign reach where reach for a given day is defined to be total number of users who were exposed in the previous 7 day window. Perform the calculation for ea

In [24]:
""" 
Prompt: What is the total reach of the campaign?

Good answer:
SELECT SUM(weight) as total_reach
FROM campaign.universe uni
INNER JOIN campaign.exposures exp ON uni.userid = exp.userid

Fine answer:
SELECT COUNT(DISTINCT userid) FROM campaign.exposures


Bad answer:
SELECT SUM(weight) AS total_reach
FROM campaign.universe

V bad answer:
SELECT SUM(DISTINCT weight) FROM campaign.universe u JOIN campaign.exposures e ON u.userid = e.userid

RESULTS:
 - Successful runs: 20
 - Bad runs: 0

 - Correct answer: 4
 - Other answers: 25K - 11, 100K - 4, 1 - 1
 
"""
print(successfulRuns, badRuns)

19 1


In [None]:
""" 
Prompt: How many users saw an ad?

Good answer:
SELECT COUNT(DISTINCT userid) FROM campaign.exposures;


RESULTS:
 - Successful runs: 19
 - Bad runs: 1

 - Correct answer: 19
 - Other answers: 
 
"""
print(successfulRuns, badRuns)

In [None]:
"""
Prompt: What is the total reach of the campaign?\n Here is a hint you can use to help answer the QUESTION. The total reach of an advertising campaign is determined by counting the number of distinct users who saw an ad.
"""