Let's set up a simple task on asking a large language model to do elementry level mathematics.

Here the user will provide expressions that can be solved using BODMAS.

In [228]:
import os
import re
import numexpr
from typing import List
from pydantic import BaseModel, Field, validator

from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.output_parsers import OutputFixingParser

from langchain.callbacks import get_openai_callback

In [183]:
from dotenv import load_dotenv
load_dotenv("/Users/ayushthakur/integrations/llm-eval/apis.env")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

## Utils

In [269]:
# Define your desired data structure.
class Result(BaseModel):
    result: str = Field(description="Computation result returned by the LLM")
        
parser = PydanticOutputParser(pydantic_object=Result)
fixing_parser = OutputFixingParser.from_llm(parser=parser, llm=ChatOpenAI())

In [303]:
def is_valid_expression(expression):
    # Remove all white spaces from the expression
    expression = re.sub(r'\s', '', expression)

    # Check if the expression contains invalid characters
    if not re.match(r'^[\d+\-*/().\[\]{}^√]+$', expression):
        return False

    # Validate brackets using a stack
    stack = []
    opening_brackets = {'(', '[', '{'}
    closing_brackets = {')', ']', '}'}
    bracket_pairs = {'(': ')', '[': ']', '{': '}'}

    for char in expression:
        if char in opening_brackets:
            stack.append(char)
        elif char in closing_brackets:
            if not stack or bracket_pairs[stack.pop()] != char:
                return False

    if stack:
        return False

    return True


def correct_expression(expr: str) -> str:
    expr = expr.replace(" ", "")
    expr = expr.replace("[", "(")
    expr = expr.replace("]", ")")
    expr = expr.replace("{", "(")
    expr = expr.replace("}", ")")
    expr = expr.replace("^", "**")
    
    return expr


def evaluate_expr(expr: str) -> str:
    local_dict = {"pi": math.pi, "e": math.e}

    if is_valid_expression(expr):
        expr = correct_expression(expr)
        output = str(
            numexpr.evaluate(
                expr.strip(),
                global_dict={},  # restrict access to globals
                local_dict=local_dict,  # add common mathematical functions
            )
        )
        return output
    else:
        return None

In [300]:
q = "2 * math.sqrt(3) + 4"

In [301]:
evaluate_expr(q)

In [302]:
eval(q)

7.464101615137754

In [304]:
prompt_template = """
The following is the mathematical expression provided by the user.
{question}

Find the answer using the BODMAS rule in the {format_instructions}:"""


PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["question"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)


def load_chain():
    llm = ChatOpenAI(temperature=0, model_name="gpt-4")
    chain = LLMChain(
        llm=llm,
        prompt=PROMPT
    )
    return chain


def get_result(question):
    chain = load_chain()

    with get_openai_callback() as cb:
        output = chain(
            question
        )

    return output, cb

In [None]:
from wandb.

In [287]:
chain = load_chain()
chain.run(question, )

'To find the answer using the BODMAS rule, we need to follow the order of operations: Brackets, Orders (exponents and roots), Division and Multiplication, and Addition and Subtraction.\n\nGiven expression: 25 - [20 - {10-(7 - 5-3)}]\n\nStep 1: Solve the innermost brackets first.\n7 - 5 - 3 = 2 - 3 = -1\n\nStep 2: Replace the innermost brackets with the result.\n25 - [20 - {10 - (-1)}]\n\nStep 3: Solve the next brackets.\n10 - (-1) = 10 + 1 = 11\n\nStep 4: Replace the brackets with the result.\n25 - [20 - 11]\n\nStep 5: Solve the remaining brackets.\n20 - 11 = 9\n\nStep 6: Replace the brackets with the result.\n25 - 9\n\nStep 7: Perform the subtraction.\n25 - 9 = 16\n\nThe final answer is 16. Now, let\'s format the output as a JSON instance that conforms to the given JSON schema.\n\n{"result": "16"}'

In [272]:
question = "25 - [20 - {10-(7 - 5-3)}]"
output, cb = get_result(question)
print(output["text"], sep="\n")

To find the answer using the BODMAS rule, we need to follow the order of operations: Brackets, Orders (exponents and roots), Division and Multiplication, and Addition and Subtraction.

Given expression: 25 - [20 - {10-(7 - 5-3)}]

Step 1: Solve the innermost brackets first (7 - 5 - 3)
7 - 5 - 3 = 2 - 3 = -1

Step 2: Replace the innermost brackets with the result
25 - [20 - {10 - (-1)}]

Step 3: Solve the next brackets (10 - (-1))
10 - (-1) = 10 + 1 = 11

Step 4: Replace the brackets with the result
25 - [20 - 11]

Step 5: Solve the remaining brackets (20 - 11)
20 - 11 = 9

Step 6: Replace the brackets with the result
25 - 9

Step 7: Perform the subtraction
25 - 9 = 16

The final answer is 16. Now, let's format the output as a JSON instance that conforms to the given JSON schema.

{"result": "16"}


In [273]:
cb

Tokens Used: 460
	Prompt Tokens: 204
	Completion Tokens: 256
Successful Requests: 1
Total Cost (USD): $0.02148

In [278]:
with get_openai_callback() as cb:
    pred_result = fixing_parser.parse(output["text"])
pred_result.result

'16'

In [284]:
cb

Tokens Used: 785
	Prompt Tokens: 779
	Completion Tokens: 6
Successful Requests: 1
Total Cost (USD): $0.0015700000000000002

In [277]:
true_result = evaluate_expr(question)
true_result

'16'

In [285]:
dir(cb)

['__annotations__',
 '__class__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'always_verbose',
 'completion_tokens',
 'ignore_agent',
 'ignore_chain',
 'ignore_chat_model',
 'ignore_llm',
 'on_agent_action',
 'on_agent_finish',
 'on_chain_end',
 'on_chain_error',
 'on_chain_start',
 'on_chat_model_start',
 'on_llm_end',
 'on_llm_error',
 'on_llm_new_token',
 'on_llm_start',
 'on_text',
 'on_tool_end',
 'on_tool_error',
 'on_tool_start',
 'prompt_tokens',
 'raise_error',
 'successful_requests',
 'total_cost',
 'total_tokens']

In [None]:
prompt_template = """
You are an expert mathematician. You can solve a given mathematical expression using the BODMAS rule.
BODMAS stands for Bracket, Orders of Indices, Division, Multiplication, Addition and Subtraction. The computation should happen in that order.
The dorder is as follows:
B: Solve expressions inside brackets in this order -> small bracket followed by curly bracket and finally square bracket.
O: Solve the indices such as roots, powers, etc.
D: Divide the numbers which are given
M: Multiply the numbers next
A: Sum up the next numbers
S: Subtract the numbers left in the end

The following is the mathematical expression provided by the user.
{question}

Think about it step-by-step. Don't skip steps.

When ready with the answer return in the {format_instructions}:"""

In [280]:
2

2

In [281]:
names = [
    "gpt-4",
    "gpt-3.5-turbo",
    "text-davinci-003",
]

In [282]:
for name in names:
    llm = ChatOpenAI(temperature=0, model_name=name)

In [283]:
llm

ChatOpenAI(verbose=False, callbacks=None, callback_manager=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='text-davinci-003', temperature=0.0, model_kwargs={}, openai_api_key='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', openai_api_base='', openai_organization='', openai_proxy='', request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None)