In [None]:
pip install boto3 langchain

In [2]:
import boto3
bedrock = boto3.client('bedrock-runtime')
from langchain.llms import Bedrock
model_kwargs =  {
    "max_tokens_to_sample": 8191,
    "temperature": 0.5,
    "top_k": 500,
    "top_p": 1,
}

llm = Bedrock(client=bedrock, model_id='anthropic.claude-v2', model_kwargs=model_kwargs, verbose=False)

In [17]:
tools = []
tools.append("""Name: dataframe_tool(). Description: Use this tool when you need to answer questions about a given dataset""")
tools.append("""Name: none_tool(). Description: Choose this tool when no tool is needed""")

In [6]:
import pandas as pd
df = pd.read_csv('/content/sample_data/california_housing_test.csv')
test_df = df[:5]
test_df

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-122.05,37.37,27.0,3885.0,661.0,1537.0,606.0,6.6085,344700.0
1,-118.3,34.26,43.0,1510.0,310.0,809.0,277.0,3.599,176500.0
2,-117.81,33.78,27.0,3589.0,507.0,1484.0,495.0,5.7934,270500.0
3,-118.36,33.82,28.0,67.0,15.0,49.0,11.0,6.1359,330000.0
4,-119.67,36.33,19.0,1241.0,244.0,850.0,237.0,2.9375,81700.0


In [21]:
def task_planner(query):
  """The agent orchestrator uses task planner to predict a retrieval tool based on user input"""
  prompt = f'Human:\n\n Answering with only the tool name. Your mission in life is to decide which tool to use based on a user query {query} and the list of available tools {tools}. Return only the tool name. Assistant:\n\n'
  tool_prediction = llm(prompt)
  return tool_prediction.strip()

In [48]:
import re
def tool_parser(tool_prediction, query):
  """clean up which tool model chooses. ie i want to use the calculator() becomes calculator()"""
  expected_pattern = r"^\w+\(\)$"
  matches = bool(re.match(expected_pattern, tool_prediction.strip()))
  if matches:
    print("match")
    prompt = f"Human:\n\n Your mission is to use the {tool_prediction} and your knowledge to write python code that will analyze the data {test_df} to answer the question {query}. Return only the necessary code to run like so: df.describe(). Use as few lines of code as possible. Do not provide steps or explanations. Assistant:\n\n"
    tool_input = llm(prompt)
    return tool_prediction, tool_input
  else:
    print("doesn't match")
    raise Exception(f"Invalid tool prediction: {tool_prediction}")

In [44]:
def tool_dispatch(tool_prediction, tool_input):
  """The tool dispatch mechanism works via if/else logic to call appropriate Lambda functions depending on the tool’s name."""
  def calculator(x):
    return x*10

  def dataframe(x):
    df = test_df
    agent_code = f"{x.strip()}"
    return eval(agent_code)

  tool_prediction = tool_prediction.strip()
  if tool_prediction.strip() == "calculator_tool()":
    return calculator(int(tool_input))
  elif tool_prediction == "none_tool()":
    return 'Epic Fail'
  elif tool_prediction == "dataframe_tool()":
    return dataframe(tool_input)

In [60]:
def output_parser(raw_tool_output):
  """Clean up tool output"""
  if type(raw_tool_output) == pd.DataFrame:
    raw_tool_output = raw_tool_output.to_string()
  else:
    raw_tool_output = str(raw_tool_output)
  return 200, raw_tool_output

In [64]:
def output_interpreter(user_input, tool_output):
  """Pass output to model and figure out if correct answer is achieved"""
  prompt = f"Human:\n\nYour mission is to use \n Question:{user_input} \nand \n Answer:{tool_output} to determine if the question can be answered using the provided answer. If so, return only the word, True. If not, return False. Assistant:\n\n"
  classification = llm(prompt).strip().replace(" ", "")
  print(classification)
  if classification == 'True':
    return True, llm(f'Use the {tool_output} to answer the question {user_input}. Assistant:\n\n')
  else:
    return False, tool_output
  return

In [65]:
MAX_LOOP_COUNT = 2 # stop the agent loop after up to 2 iterations
# ... helper function definitions ...
def agent_handler(query):
    user_input = query
    print(f"user input: {user_input}")

    final_generation = ""
    is_task_complete = False
    loop_count = 0

    # start of agent loop
    while not is_task_complete and loop_count < MAX_LOOP_COUNT:
        tool_prediction = task_planner(user_input)
        print(f"tool_prediction: {tool_prediction}")

        tool_name, tool_input, tool_output, error_msg = None, None, "", ""

        try:
            tool_name, tool_input = tool_parser(tool_prediction, user_input)
            print(f"tool name: {tool_name}")
            print(f"tool input: {tool_input}")
        except Exception as e:
            error_msg = str(e)
            print(f"tool parse error: {error_msg}")

        if tool_name is not None: # if a valid tool is selected and parsed
            raw_tool_output = tool_dispatch(tool_name, tool_input)
            tool_status, tool_output = output_parser(raw_tool_output)
            from pprint import pprint
            pprint(tool_output)
            print(f"tool status: {tool_status}")
            if tool_status == 200:
                is_task_complete, final_generation = output_interpreter(user_input, tool_output)
                print(is_task_complete)
            else:
                final_generation = tool_output
        else: # if no valid tool was selected and parsed, either return the default msg or error msg
            final_generation = 'Epic Fail' if error_msg == "" else error_msg

        loop_count += 1

    return {
        'statusCode': 200,
        'body': final_generation
    }

In [62]:
test_df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
0,-122.05,37.37,27.0,3885.0,661.0,1537.0,606.0,6.6085,344700.0
1,-118.3,34.26,43.0,1510.0,310.0,809.0,277.0,3.599,176500.0
2,-117.81,33.78,27.0,3589.0,507.0,1484.0,495.0,5.7934,270500.0
3,-118.36,33.82,28.0,67.0,15.0,49.0,11.0,6.1359,330000.0
4,-119.67,36.33,19.0,1241.0,244.0,850.0,237.0,2.9375,81700.0


In [63]:
test_df.sort_values('housing_median_age', ascending=False).head(3)[['housing_median_age']].to_string()

'   housing_median_age\n1                43.0\n3                28.0\n0                27.0'

In [61]:
agent_handler("What are the three oldest houses in the dataset")

user input: What are the three oldest houses in the dataset
tool_prediction: dataframe_tool()
match
tool name: dataframe_tool()
tool input:  df.sort_values('housing_median_age', ascending=False).head(3)[['housing_median_age']]
('   housing_median_age\n'
 '1                43.0\n'
 '3                28.0\n'
 '0                27.0')
tool status: 200


{'statusCode': 200, 'body': ''}