In [None]:
# ____ hitting all question in dataset ____ 
import pandas as pd
import numpy as np
import os


In [None]:
# add to 'dataset' dataframe add empty columns for agents intermediate steps and the agents answer. also add a column called gpt-4 answer.
dataset['agent_intermediate_steps'] = ''
dataset['agent_answer'] = ''
dataset['agents_score'] = ''
dataset['gpt-4_answer'] = ''
dataset['gpt-4_score'] = ''

In [None]:
# ____ get openai api key ____
# Path: C:\Users\cayde\Desktop\Grad_School_stuff\data-534_expo_proj\gpt-4-stuff\openai_key.txt
# open the file and read in the key
with open(r'C:\Users\cayde\Desktop\Grad_School_stuff\data-534_expo_proj\gpt-4-stuff\openai_key.txt', 'r') as f:
    openai_api_key = f.read()


In [None]:
# ____ init agent ____
# init langchain agent with gpt-4
from langchain import LLMChain, PromptTemplate, LLMMathChain
from langchain.llms import OpenAI
from langchain.agents import load_tools, initialize_agent, ZeroShotAgent, Tool, AgentExecutor
from langchain.chat_models import ChatOpenAI
from langchain.utilities import PythonREPL
from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper
from langchain.chains import ConversationChain


llm = ChatOpenAI(model='gpt-4',temperature=0, openai_api_key=openai_api_key)
wolfram = WolframAlphaAPIWrapper(wolfram_alpha_appid='LXATGL-RJ6P2UWYT4')
python_repl = PythonREPL()

# ____ init tools ____
llm1 = OpenAI(temperature=0, openai_api_key=openai_api_key)
llm_math_chain = LLMMathChain(llm=llm1, verbose=True)
tools = [
    Tool(
        name="Calculator",
        func=llm_math_chain.run,
        description="useful for when you need to answer simple questions about math"
    ),
    Tool(
        name="python_repl", 
        func=python_repl.run,
        description="A Python shell. Use this to execute python commands. Input should be a valid python command. If you want to see the output of a value, you should print it out with `print(...)`."
    ),
    Tool(
        name="wolfram_alpha",
        func=wolfram.run,
        description="A tool that can answer questions about math, science, and more. Input should be a question. For example, `What is the derivative of x^2?`"
    )
]

# ____ init prompt ____
prefix = """You are taking a test. Answer the math questions to the best of your ability. Equations are written in latex. You have access to the following tools:"""
suffix = """Begin! Remeber answer the math questions to the best of your ability. Equations are written in latex.

Question: {input}
{agent_scratchpad}"""

prompt = ZeroShotAgent.create_prompt(
    tools, 
    prefix=prefix, 
    suffix=suffix, 
    input_variables=["input", "agent_scratchpad"]
)

# ____ run agent ____
llm_chain = LLMChain(llm=llm, prompt=prompt)

tool_names = [tool.name for tool in tools]
agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names)

intermediate_steps = []
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, return_intermediate_steps=True, max_iterations=7)

In [None]:
# ____ init gpt-4 ____
import openai

def ask_gpt4_student(prompt):
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {
                "role": "system",
                "content": """You are taking a test. Answer the math questions to the best of your ability. Equations are written in latex.""",
            },
            {
                "role": "user",
                "content": prompt,
            },
        ],
    )
    assistant_reply = response["choices"][0]["message"]["content"]
    return assistant_reply

def ask_gpt4_grader(question, student_answer, actual_answer):
    prompt = "question: " + question + '\nstudent_answer: ' + student_answer + '\nactual_answer: ' + actual_answer


    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {
                "role": "system",
                "content": """You are evaluating a student's work. you are provided the question the student was asked, and the student's work and answer, as well as the actual answer to the question. You are to evaluate the student's work and determine if the student was CORRECT or INCORRECT. Start each response with either CORRECT or INCORRECT explain briefly why the student is right or wrong. Equations are written in latex. """,
            },
            {
                "role": "user",
                "content": prompt,
            },
        ],
    )
    assistant_reply = response["choices"][0]["message"]["content"]
    return assistant_reply

In [None]:
# ___ add a rety api call to script __

import time
import pandas as pd
import os

MAX_RETRIES = 5

def retry_on_failure(function, *args, **kwargs):
    for attempt in range(MAX_RETRIES):
        try:
            return function(*args, **kwargs)
        except Exception as e:
            if attempt < MAX_RETRIES - 1:  # i.e., not on last attempt
                sleep_seconds = 2 ** attempt  # exponential backoff
                print(f"Error: {e}. Retrying in {sleep_seconds} seconds...")
                time.sleep(sleep_seconds)
            else:
                print(f"Error: {e}. No more retries.")
                raise e

for i in range(len(dataset)):
    problem = dataset.loc[i, 'problem']
    solution = dataset.loc[i, 'solution']

    try:
        response = retry_on_failure(agent_executor, {'input': problem})
        intermediate_steps = str(response["intermediate_steps"])
        agent_answer = str(response["output"])
        # agent_intermediate_steps = str(dataset.loc[i, 'agent_intermediate_steps'])
        # agent_answer = str(dataset.loc[i, 'agent_answer'])
        agent_scratchpad = str(agent_intermediate_steps + agent_answer)
        agents_score = retry_on_failure(ask_gpt4_grader, problem, agent_scratchpad, solution)
    except Exception as e:
        agent_answer = 'failed to answer'
        agents_score = 'INCORRECT'
        print(f"Error with agent: {e}")

    try:
        gpt_4_answer = retry_on_failure(ask_gpt4_student, problem)
        gpt_4_score = retry_on_failure(ask_gpt4_grader, problem, gpt_4_answer, solution)
    except Exception as e:
        gpt_4_answer = 'failed to answer'
        gpt_4_score = 'INCORRECT'
        print(f"Error with GPT-4: {e}")

    dataset.loc[i, 'agent_intermediate_steps'] = intermediate_steps
    dataset.loc[i, 'agent_answer'] = agent_answer
    dataset.loc[i, 'agents_score'] = agents_score
    dataset.loc[i, 'gpt-4_answer'] = gpt_4_answer
    dataset.loc[i, 'gpt-4_score'] = gpt_4_score

    dataset.to_csv('output/batch1_Q_89_120.csv', index=False)