In [15]:
pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [6]:
from dotenv import load_dotenv

load_dotenv()

True

In [7]:
import pprint
from typing import Any, Dict

import pandas as pd
from langchain.output_parsers import PandasDataFrameOutputParser
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

In [8]:
model = ChatOpenAI(temperature=0)

In [18]:
import boto3
from langchain.llms.bedrock import Bedrock
modelId = "anthropic.claude-v2:1"
bedrock_client = boto3.client(
    service_name="bedrock-runtime",
    region_name="us-west-2",
)
model = Bedrock(
    model_id=modelId,
    model_kwargs={
        "max_tokens_to_sample": 4096,
        "stop_sequences": [],
        "temperature": 0.5,  # Use a lower value to decrease randomness in the response.
        "top_p": 1,  # Use a lower value to ignore less probable options.
        "top_k": 250,  # Specify the number of token choices the model uses to generate the next token.
    },
    client=bedrock_client,
)

In [19]:
# Solely for documentation purposes.
def format_parser_output(parser_output: Dict[str, Any]) -> None:
    for key in parser_output.keys():
        parser_output[key] = parser_output[key].to_dict()
    return pprint.PrettyPrinter(width=4, compact=True).pprint(parser_output)

In [20]:
# Define your desired Pandas DataFrame.
df = pd.DataFrame(
    {
        "num_legs": [2, 4, 8, 0],
        "num_wings": [2, 0, 0, 0],
        "num_specimen_seen": [10, 2, 1, 8],
    }
)

# Set up a parser + inject instructions into the prompt template.
parser = PandasDataFrameOutputParser(dataframe=df)

In [21]:
# Here's an example of a column operation being performed.
df_query = "Retrieve the num_wings column."

# Set up the prompt.
prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | model | parser
parser_output = chain.invoke({"query": df_query})

format_parser_output(parser_output)

{'num_wings': {0: 2,
               1: 0,
               2: 0,
               3: 0}}
