In [1]:
# ____ hitting all question in dataset ____ 
import pandas as pd
import numpy as np
import os

# # read in the data
df = pd.read_csv('random_sample.csv')

# define output path
output_path = 'output/batch10.csv' #### CHANGE BEFORE RUNNING ####

In [2]:
# there is a column in df name batch_number create a new dataframe called datasets if the value of this column is 2 or 3
dataset = df[df['batch_number'].isin([10])]

# print all the names of the columns in the dataset
print(dataset.columns)

Index(['Unnamed: 0', 'problem', 'solution', 'PK', 'agent_intermediate_steps',
       'agent_answer', 'agents_score', 'gpt-4_answer', 'gpt-4_score',
       'batch_number'],
      dtype='object')


In [3]:
# drop the columns PK and Unnamed: 0
dataset = dataset.drop(['PK', 'Unnamed: 0'], axis=1)

In [4]:
# add to 'dataset' dataframe add empty columns for agents intermediate steps and the agents answer. also add a column called gpt-4 answer.
dataset['agent_intermediate_steps'] = ''
dataset['agent_answer'] = ''
dataset['agents_score'] = ''
dataset['gpt-4_answer'] = ''
dataset['gpt-4_score'] = ''

In [5]:
# # cut down dataset to 5 rows for testing
# dataset = dataset.head(5)

In [6]:
# reset the index of the dataset
dataset = dataset.reset_index(drop=True)

In [7]:
# ____ get openai api key ____
# Path: C:\Users\cayde\Desktop\Grad_School_stuff\data-534_expo_proj\gpt-4-stuff\openai_key.txt
# open the file and read in the key
with open(r'/Users/cindydunn/Desktop/Grad_School/gestalt/openai_key.txt', 'r') as f:
    openai_api_key = f.read()


In [8]:
# ____ init agent ____
# init langchain agent with gpt-4
from langchain import LLMChain, PromptTemplate, LLMMathChain
from langchain.llms import OpenAI
from langchain.agents import load_tools, initialize_agent, ZeroShotAgent, Tool, AgentExecutor
from langchain.chat_models import ChatOpenAI
from langchain.utilities import PythonREPL
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
from langchain.chains import ConversationChain


llm = ChatOpenAI(model='gpt-4',temperature=0, openai_api_key=openai_api_key)
wolfram = WolframAlphaAPIWrapper(wolfram_alpha_appid='LXATGL-RJ6P2UWYT4')
python_repl = PythonREPL()

# ____ init tools ____
llm1 = OpenAI(temperature=0, openai_api_key=openai_api_key)
llm_math_chain = LLMMathChain(llm=llm, verbose=True)
tools = [
    Tool(
        name="Calculator",
        func=llm_math_chain.run,
        description="useful for when you need to answer simple questions about math"
    ),
    Tool(
        name="python_repl", 
        func=python_repl.run,
        description="A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`."
    ),
        Tool(
        name="wolfram_alpha",
        func=wolfram.run,
        description="A tool that can answer questions about math, science, and more. Input should be a question. For example, `What is the derivative of x^2?`"
    )
]

# ____ init prompt ____
prefix = """You are taking a test. Answer the math questions to the best of your ability. Equations are written in latex. You have access to the following tools:"""
suffix = """Begin! Remeber answer the math questions to the best of your ability. Equations are written in latex.

Question: {input}
{agent_scratchpad}"""

prompt = ZeroShotAgent.create_prompt(
    tools, 
    prefix=prefix, 
    suffix=suffix, 
    input_variables=["input", "agent_scratchpad"]
)

# ____ run agent ____
llm_chain = LLMChain(llm=llm, prompt=prompt)

tool_names = [tool.name for tool in tools]
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names)

intermediate_steps = []
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, return_intermediate_steps=True, max_iterations=7) # verbose=True



In [9]:
# ____ init gpt-4 ____
import openai

def ask_gpt4_student(prompt):
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {
                "role": "system",
                "content": """You are taking a test. Answer the math questions to the best of your ability. Equations are written in latex.""",
            },
            {
                "role": "user",
                "content": prompt,
            },
        ],
    )
    assistant_reply = response["choices"][0]["message"]["content"]
    return assistant_reply

def ask_gpt4_grader(question, student_answer, actual_answer):
    prompt = "question: " + question + '\nstudent_answer: ' + student_answer + '\nactual_answer: ' + actual_answer


    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {
                "role": "system",
                "content": """You are evaluating a student's work. you are provided the question the student was asked, and the student's work and answer, as well as the actual answer to the question. You are to evaluate the student's work and determine if the student was CORRECT or INCORRECT. Start each response with either CORRECT or INCORRECT explain briefly why the student is right or wrong. Equations are written in latex. """,
            },
            {
                "role": "user",
                "content": prompt,
            },
        ],
    )
    assistant_reply = response["choices"][0]["message"]["content"]
    return assistant_reply

In [10]:
# ___ add a rety api call to script __

import time
import pandas as pd
import os

MAX_RETRIES = 5

def retry_on_failure(function, *args, **kwargs):
    for attempt in range(MAX_RETRIES):
        try:
            return function(*args, **kwargs)
        except Exception as e:
            if attempt < MAX_RETRIES - 1:  # i.e., not on last attempt
                sleep_seconds = 2 ** attempt  # exponential backoff
                print(f"retry_on_failure: {e}. Retrying in {sleep_seconds} seconds...")
                time.sleep(sleep_seconds)
            else:
                print(f"retry_on_failure: {e}. No more retries.")
                raise e

for i in range(len(dataset)):
    problem = dataset.loc[i, 'problem']
    solution = dataset.loc[i, 'solution']

    # print what question we are on
    print(f"Question {i+1} of {len(dataset)}")

    # print run agent
    print("Running agent...")
    try:
        response = agent_executor({'input':problem})
        intermediate_steps = str(response["intermediate_steps"])
        agent_answer = str(response["output"])
        print("Running agent grader...")
        agent_scratchpad = str(intermediate_steps + agent_answer)
        agents_score = retry_on_failure(ask_gpt4_grader, problem, agent_scratchpad, solution)
    except Exception as e:
        agent_answer = 'failed to answer'
        agents_score = 'INCORRECT'
        print(f"Error with agent: {e}")

    # print run gpt-4
    print("Running GPT-4...")
    try:
        gpt_4_answer = retry_on_failure(ask_gpt4_student, problem)
        print("Running GPT-4 grader...")
        gpt_4_score = retry_on_failure(ask_gpt4_grader, problem, gpt_4_answer, solution)
    except Exception as e:
        gpt_4_answer = 'failed to answer'
        gpt_4_score = 'INCORRECT'
        print(f"Error with GPT-4: {e}")

    # print saving results
    print("Saving results...")
    dataset.loc[i, 'agent_intermediate_steps'] = str(intermediate_steps)
    dataset.loc[i, 'agent_answer'] = str(agent_answer)
    dataset.loc[i, 'agents_score'] = agents_score
    dataset.loc[i, 'gpt-4_answer'] = gpt_4_answer
    dataset.loc[i, 'gpt-4_score'] = gpt_4_score

    dataset.to_csv(output_path, index=False)

Question 1 of 120
Running agent...
Running agent grader...
Running GPT-4...
Running GPT-4 grader...
Saving results...
Question 2 of 120
Running agent...


[1m> Entering new LLMMathChain chain...[0m
12 * 6 + 12 * 6 + 6 * 4 + 13 * 5

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised APIError: Bad gateway. {"error":{"code":502,"message":"Bad gateway.","param":null,"type":"cf_bad_gateway"}} 502 {'error': {'code': 502, 'message': 'Bad gateway.', 'param': None, 'type': 'cf_bad_gateway'}} {'Date': 'Sat, 03 Jun 2023 15:46:12 GMT', 'Content-Type': 'application/json', 'Content-Length': '84', 'Connection': 'keep-alive', 'X-Frame-Options': 'SAMEORIGIN', 'Referrer-Policy': 'same-origin', 'Cache-Control': 'private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0', 'Expires': 'Thu, 01 Jan 1970 00:00:01 GMT', 'Server': 'cloudflare', 'CF-RAY': '7d1909865cd732f8-JAX', 'alt-svc': 'h3=":443"; ma=86400'}.


[32;1m[1;3m```text
12 * 6 + 12 * 6 + 6 * 4 + 13 * 5
```
...numexpr.evaluate("12 * 6 + 12 * 6 + 6 * 4 + 13 * 5")...
[0m
Answer: [33;1m[1;3m233[0m
[1m> Finished chain.[0m


[1m> Entering new LLMMathChain chain...[0m
12 + 12 + 6 + 13[32;1m[1;3m```text
12 + 12 + 6 + 13
```
...numexpr.evaluate("12 + 12 + 6 + 13")...
[0m
Answer: [33;1m[1;3m43[0m
[1m> Finished chain.[0m


[1m> Entering new LLMMathChain chain...[0m
233 / 43[32;1m[1;3m```text
233 / 43
```
...numexpr.evaluate("233 / 43")...
[0m
Answer: [33;1m[1;3m5.4186046511627906[0m
[1m> Finished chain.[0m
Running agent grader...
Running GPT-4...
Running GPT-4 grader...
Saving results...
Question 3 of 120
Running agent...


[1m> Entering new LLMMathChain chain...[0m
1 - (1 - cos(x)^2)^2 - cos(x)^2 = 1/16[32;1m[1;3m```text
1 - (1 - cos(x)**2)**2 - cos(x)**2 - 1/16
```
...numexpr.evaluate("1 - (1 - cos(x)**2)**2 - cos(x)**2 - 1/16", {'x': 0.5})...
[0mError with agent: LLMMathChain._evaluate("
1 - (1 - cos(x)**2)*