In [1]:
# imports and api_keys
from langchain import OpenAI, LLMMathChain, SerpAPIWrapper, WolframAlphaAPIWrapper
from langchain.agents import initialize_agent, Tool
from langchain.chat_models import ChatOpenAI

# read in the api key from file apikeys.txt. with file path C:\Users\cayde\Desktop\Grad_School_stuff\data-534_expo_proj\openai_key.txt
with open("C:\\Users\\cayde\\Desktop\\Grad_School_stuff\\data-534_expo_proj\\openai_key.txt", "r") as f:
    openai_api_key = f.read()

# read in serp api key from C:\Users\cayde\Desktop\Grad_School_stuff\data-534_expo_proj\serpapi_key.txt
with open("C:\\Users\\cayde\\Desktop\\Grad_School_stuff\\data-534_expo_proj\\serpapi_key.txt", "r") as f:
    serp_api_key = f.read()

# read in wolfram alpha api key from C:\Users\cayde\Desktop\Grad_School_stuff\data-534_expo_proj\wolfram_appid.txt
with open("C:\\Users\\cayde\\Desktop\\Grad_School_stuff\\data-534_expo_proj\\wolfram_appid.txt", "r") as f:
    wolfram_appid = f.read()


In [2]:
llm = ChatOpenAI(temperature=0, openai_api_key=openai_api_key)
llm1 = OpenAI(temperature=0, openai_api_key=openai_api_key)
search = SerpAPIWrapper(serpapi_api_key=serp_api_key)
llm_math_chain = LLMMathChain(llm=llm1, verbose=True)
wolfram = WolframAlphaAPIWrapper(wolfram_alpha_appid=wolfram_appid)
tools = [
    Tool(
        name = "Search",
        func=search.run,
        description="useful for when you need to answer questions about current events. You should ask targeted questions"
    ),
    Tool(
        name="Calculator",
        func=llm_math_chain.run,
        description="useful for when you need to answer simple questions about math"
    ),
]

In [3]:
mrkl = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True)

In [4]:
# test to see if model is working
# mrkl.run("""Solve 312*s + 276*s - 661*s + 952 = -362 for s.""")





In [5]:
# read in the test from C:\Users\cayde\Desktop\Grad_School_stuff\data-534_expo_proj\Synergistic_Computing_hf\benchmarking\deepmind_math_q.txt
with open("C:\\Users\\cayde\\Desktop\\Grad_School_stuff\\data-534_expo_proj\\Synergistic_Computing_hf\\benchmarking\\deepmind_math_q.txt", "r") as f:
    test = f.read()

# the test data is formated as follows: 
# Which is the nearest to 6?  (a) -462/107  (b) 0.3  (c) 6/19
# c
# as you can see the first line holds the question and the second line holds the answer.
# please create a data structure that holds the questions and answers in a list of tuples
# for example: [("Which is the nearest to 6?  (a) -462/107  (b) 0.3  (c) 6/19", "c"), ...]

# split the test data into a list of questions and answers
test = test.split("\n")

# create a list of tuples that hold the questions and answers
test = [(test[i], test[i+1]) for i in range(0, len(test), 2)]

# this is for testing purposes only. please comment out this line when you are done testing
# # cut down the test data to 2 questions
# test = test[:2]

In [6]:
# testing my model with a set of math questions from deepmind (https://github.com/deepmind/mathematics_dataset)
# redirect stdout to file for logging
import sys
import re
sys.stdout = open('C:\\Users\\cayde\\Desktop\\Grad_School_stuff\\data-534_expo_proj\\Synergistic_Computing_hf\\benchmarking\\MRKL_chat_test.txt', 'w')
i = 1
# create a variable to hold the number of questions
num_questions = len(test)
# create a variable to hold the number of correct answers
num_correct = 0
# loop through the questions and ask mrkl each question
for question, answer in test:
    print(f'=============================Q{i}==============================================================\n')
    # with nice formating write the question that the agent will be asked
    print(f"Question: {question} \n")
    # ask the agent the question
    print('---------------------------------------------------------------\n')
    # try to have the agent answer the question. if the agent fails to answer the question then return agent failed to answer and move on to the next question
    try:
        result = mrkl.run(question)
    except Exception as e:
        print(f"Agent's Answer: Agent Failed to Answer \n")
        print(f'==============================================================================================\n')
        i += 1
        continue
    
    print('---------------------------------------------------------------\n')
    # with nice formating write the result that the agent gave
    print(f"Agent's Answer: {result} \n")
    print('---------------------------------------------------------------\n')
    # with nice formating write the answer that the agent will be asked
    print(f"Actual Answer: {answer} \n")
    # evaluate if the agent got the answer correct. use regex to see if the answer is in the result
    if re.search(answer, result):
        # if the answer is in the result then the agent got the answer correct
        num_correct += 1
        print(f"Agent's Answer: Correct \n")
    print(f'==============================================================================================\n')
    i += 1

# print the number of questions and the number of correct answers
print(f"Number of Questions: {num_questions} \n")
print(f"Number of Correct Answers: {num_correct} \n")
print(f"Accuracy: {num_correct/num_questions} \n")


In [6]:
# set up normal openai davinci model to compare to
import openai
openai.api_key = openai_api_key

def query_davinci_old_version(prompt, model_version="text-davinci-003"):
    response = openai.Completion.create(
        engine=model_version,
        prompt=prompt,
        max_tokens=40,
        n=1,
        stop=None,
        temperature=0.15
    )

    return response.choices[0].text.strip()

# Example usage:
prompt = "Which is the nearest to 6?  (a) -462/107  (b) 0.3  (c) 6/19"
response_text = query_davinci_old_version(prompt)
print(response_text)

(b) 0.3


In [9]:
import re
# testing regular open ai model 
# create a new empty txt file to hold the results 
with open("C:\\Users\\cayde\\Desktop\\Grad_School_stuff\\data-534_expo_proj\\Synergistic_Computing_hf\\benchmarking\\deepmind_math_q_results.txt", "w", encoding='utf-8') as f:
    f.write("")

j = 1
num_correct = 0
num_questions = len(test)

# loop through the questions and ask mrkl each question
for question, answer in test:
    try:
        result = query_davinci_old_version(question)
    except Exception as e:
        result = "Agent Failed to Answer"
        with open("C:\\Users\\cayde\\Desktop\\Grad_School_stuff\\data-534_expo_proj\\Synergistic_Computing_hf\\benchmarking\\deepmind_math_q_results.txt", "a", encoding='utf-8') as f:
            f.write(f'=============================Q{j}==============================================================\n')
            # with nice formating write the question that the agent will be asked
            f.write(f"Question: {question} \n")
            f.write('---------------------------------------------------------------\n')
            # with nice formating write the result that the agent gave
            f.write(f"Agent's Answer:\n {result} \n")
            f.write('---------------------------------------------------------------\n')
            # with nice formating write the answer that the agent will be asked
            f.write(f"Actual Answer: {answer} \n")
            f.write(f'==============================================================================================\n')
        i += 1
        continue
    # write the question and answer to the results file
    with open("C:\\Users\\cayde\\Desktop\\Grad_School_stuff\\data-534_expo_proj\\Synergistic_Computing_hf\\benchmarking\\deepmind_math_q_results.txt", "a", encoding='utf-8') as f:
        f.write(f'=============================Q{j}==============================================================\n')
        # with nice formating write the question that the agent will be asked
        f.write(f"Question: {question} \n")
        f.write('---------------------------------------------------------------\n')
        # with nice formating write the result that the agent gave
        f.write(f"Agent's Answer:\n {result} \n")
        f.write('---------------------------------------------------------------\n')
        # with nice formating write the answer that the agent will be asked
        f.write(f"Actual Answer: {answer} \n")
        if re.search(answer, result):
            # if the answer is in the result then the agent got the answer correct
            num_correct += 1
            f.write(f"Agent's Answer: Correct \n")
        f.write(f'==============================================================================================\n')
        
    j += 1

with open("C:\\Users\\cayde\\Desktop\\Grad_School_stuff\\data-534_expo_proj\\Synergistic_Computing_hf\\benchmarking\\deepmind_math_q_results.txt", "a", encoding='utf-8') as f:
    f.write(f"Number of Questions: {num_questions} \n")
    f.write(f"Number of Correct Answers: {num_correct} \n")
    f.write(f"Accuracy: {num_correct/num_questions} \n")

# print the number of questions and the number of correct answers
print(f"Number of Questions: {num_questions} \n")
print(f"Number of Correct Answers: {num_correct} \n")
print(f"Accuracy: {num_correct/num_questions} \n")

Number of Questions: 105 

Number of Correct Answers: 37 

Accuracy: 0.3523809523809524 

