In [None]:
import os

os.environ["LANGCHAIN_API_KEY"] = "LANGCHAIN_API_KEY_HERE"
os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY_HERE"

In [None]:
from langchain_benchmarks import registry

In [None]:
task = registry["Multiverse Math"]
task

0,1
Name,Multiverse Math
Type,ToolUsageTask
Dataset ID,47ed57bc-e852-4f84-a23e-cce4793864e9
Description,"An environment that contains a few basic math operations, but with altered results. For example, multiplication of 5*3 will be re-interpreted as 5*3*1.1. The basic operations retain some basic properties, such as commutativity, associativity, and distributivity; however, the results are different than expected. The objective of this task is to evaluate the ability to use the provided tools to solve simple math questions and ignore any innate knowledge about math. This task is associated with 20 test examples."


In [None]:
env = task.create_environment()
env.tools[:5]

[StructuredTool(name='multiply', description='multiply(a: float, b: float) -> float - Multiply two numbers; a * b.', args_schema=<class 'pydantic.v1.main.multiplySchema'>, func=<function multiply at 0x7656730f68c0>),
 StructuredTool(name='add', description='add(a: float, b: float) -> float - Add two numbers; a + b.', args_schema=<class 'pydantic.v1.main.addSchema'>, func=<function add at 0x765671ba5a20>),
 StructuredTool(name='divide', description='divide(a: float, b: float) -> float - Divide two numbers; a / b.', args_schema=<class 'pydantic.v1.main.divideSchema'>, func=<function divide at 0x765671ba5990>),
 StructuredTool(name='subtract', description='subtract(a: float, b: float) -> float - Subtract two numbers; a - b.', args_schema=<class 'pydantic.v1.main.subtractSchema'>, func=<function subtract at 0x765671ba5bd0>),
 StructuredTool(name='power', description='power(a: float, b: float) -> float - Raise a number to a power; a ** b.', args_schema=<class 'pydantic.v1.main.powerSchema'>

In [None]:
env.tools[0].invoke({"a": 2, "b": 4})

8.8

In [None]:
task.instructions

'You are requested to solve math questions in an alternate mathematical universe. The operations have been altered to yield different results than expected. Do not guess the answer or rely on your  innate knowledge of math. Use the provided tools to answer the question. While associativity and commutativity apply, distributivity does not. Answer the question using the fewest possible tools. Only include the numeric response without any clarifications.'

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai.chat_models import ChatOpenAI

from langchain_benchmarks.tool_usage.agents import StandardAgentFactory

model = ChatOpenAI(temperature=0)
prompt = ChatPromptTemplate.from_messages(
    [
        # Populated from task.instructions automatically
        ("system", "{instructions}"),
        ("human", "{question}"),  # Populated from the test data
        (
            "placeholder",
            "{agent_scratchpad}",
        ),  # Work where the agent can do its work (e.g., call multiple tools)
    ]
)

agent_factory = StandardAgentFactory(task, model, prompt)

In [None]:
from langchain import globals

globals.set_verbose(True)

agent = agent_factory()
agent.invoke({"question": "how much is 2+5"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `add` with `{'a': 2, 'b': 5}`


[0m[33;1m[1;3m8.2[0m[32;1m[1;3m8.2[0m

[1m> Finished chain.[0m


{'question': 'how much is 2+5',
 'output': '8.2',
 'intermediate_steps': [(ToolAgentAction(tool='add', tool_input={'a': 2, 'b': 5}, log="\nInvoking: `add` with `{'a': 2, 'b': 5}`\n\n\n", message_log=[AIMessageChunk(content='', additional_kwargs={'tool_calls': [{'index': 0, 'id': 'call_x0Ci4GY8rcrqJhAzcMxdS8TJ', 'function': {'arguments': '{"a":2,"b":5}', 'name': 'add'}, 'type': 'function'}]}, response_metadata={'finish_reason': 'tool_calls'}, id='run-f4305e1f-6ca5-41e4-a695-a55a8ff422e6', tool_calls=[{'name': 'add', 'args': {'a': 2, 'b': 5}, 'id': 'call_x0Ci4GY8rcrqJhAzcMxdS8TJ'}], tool_call_chunks=[{'name': 'add', 'args': '{"a":2,"b":5}', 'id': 'call_x0Ci4GY8rcrqJhAzcMxdS8TJ', 'index': 0}])], tool_call_id='call_x0Ci4GY8rcrqJhAzcMxdS8TJ'),
   8.2)]}