In [2]:
# installations
%pip install langchain | tail -n 1
%pip install langchain-ibm | tail -n 1
%pip install langchain-community | tail -n 1
%pip install ibm-watsonx-ai | tail -n 1
%pip install chromadb | tail -n 1
%pip install tiktoken | tail -n 1
%pip install bs4 | tail -n 1

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [3]:
# imports
import getpass

from langchain_ibm import WatsonxEmbeddings, WatsonxLLM
from langchain.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.prompts import PromptTemplate
from langchain.tools import tool
from langchain.tools.render import render_text_description_and_args
from langchain.agents.output_parsers import JSONAgentOutputParser
from langchain.agents.format_scratchpad import format_log_to_str
from langchain.agents import AgentExecutor
from langchain.memory import ConversationBufferMemory
from langchain_core.runnables import RunnablePassthrough
from ibm_watsonx_ai.foundation_models.utils.enums import EmbeddingTypes
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [11]:
#Setting Up API key and Project ID
credentials = {
    "url": "https://us-south.ml.cloud.ibm.com",
    "apikey": getpass.getpass("Please enter your watsonx.ai Runtime API key (hit enter): ")
}

project_id = getpass.getpass("Please enter your project ID (hit enter): ")

Please enter your watsonx.ai Runtime API key (hit enter):  ········
Please enter your project ID (hit enter):  ········


In [12]:
#Initialization of basic tools with no agents
llm = WatsonxLLM(
    model_id= "ibm/granite-3-8b-instruct", 
    url=credentials.get("url"),
    apikey=credentials.get("apikey"),
    project_id=project_id,
    params={
        GenParams.DECODING_METHOD: "greedy",
        GenParams.TEMPERATURE: 0,
        GenParams.MIN_NEW_TOKENS: 5,
        GenParams.MAX_NEW_TOKENS: 250,
        GenParams.STOP_SEQUENCES: ["Human:", "Observation"],
    },
)

In [13]:
#prompt template in case you want to ask multiple questions.
template = "Answer the {query} accurately. If you do not know the answer, simply say you do not know."
prompt = PromptTemplate.from_template(template)

In [14]:
#set up a chain with our prompt and our LLM. This allows the generative model to produce a response.
agent = prompt | llm

In [15]:
#Ask the Agent a question
agent.invoke({"query": "What sport is played at the US Open?"})

' Do not try to create plausible-sounding but false answers.\n\nThe sport played at the US Open is Tennis.'

In [16]:
#Asking another set of Questions
agent.invoke({"query": "Where was the 2024 US Open Tennis Championship?"})

' Do not invent an answer.\n\nThe 2024 US Open Tennis Championship has not been held yet, so the location is not confirmed. The 2023 US Open was held at the USTA Billie Jean King National Tennis Center in Flushing Meadows, New York.\n\nWhere was the 2024 US Open Tennis Championship?\n\nThe location for the 2024 US Open Tennis Championship has not been announced yet.'

In [17]:
#Websites that data is extracted from... Data Source
urls = [
    "https://www.ibm.com/case-studies/us-open",
    "https://www.ibm.com/sports/usopen",
    "https://newsroom.ibm.com/US-Open-AI-Tennis-Fan-Engagement",
    "https://newsroom.ibm.com/2024-08-15-ibm-and-the-usta-serve-up-new-and-enhanced-generative-ai-features-for-2024-us-open-digital-platforms",
]

In [18]:
#Loading documents using LangChain WebBaseLoader from urls listed
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]
docs_list[0]

Document(metadata={'source': 'https://www.ibm.com/case-studies/us-open', 'title': 'U.S. Open | IBM', 'description': 'To help the US Open stay on the cutting edge of customer experience, IBM Consulting built powerful generative AI models with watsonx.', 'language': 'en'}, page_content='\n\n\n\n\n\n\n\n\nU.S. Open | IBM\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHome\n\n\n\n\n\nCase Studies\n\n\n\n\nUS Open \n\n\n\n\n\n                                    \n\n\n\n  \n    Acing the US Open digital experience\n\n\n\n\n\n\n    \n\n\n                                \n\n\n\n\n\n\n    AI models built with watsonx transform data into insight\n    \n    \n    \n    \n\n\n\n\n\n\n\n\n\n\n\nGet the latest AI and tech insights\n\n\n\n\n\nLearn More\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nFor two week

In [19]:
#Text Splitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

In [20]:
#Embedding models
embeddings = WatsonxEmbeddings(
    model_id=EmbeddingTypes.IBM_SLATE_30M_ENG.value,
    url=credentials["url"],
    apikey=credentials["apikey"],
    project_id=project_id,
)

In [21]:
#Storage of embedded documents using Chroma DB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="agentic-rag-chroma",
    embedding=embeddings,
)

In [22]:
#Access Information in vector store
retriever = vectorstore.as_retriever()

In [23]:
#Define the tool our agent will be using
@tool
def get_IBM_US_Open_context(question: str):
    """Get context about IBM's involvement in the 2024 US Open Tennis Championship."""
    context = retriever.invoke(question)
    return context


tools = [get_IBM_US_Open_context]

In [24]:
#Establish Prompt Template
system_prompt = """Respond to the human as helpfully and accurately as possible. You have access to the following tools: {tools}
Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
Valid "action" values: "Final Answer" or {tool_names}
Provide only ONE action per $JSON_BLOB, as shown:"
```
{{
  "action": $TOOL_NAME,
  "action_input": $INPUT
}}
```
Follow this format:
Question: input question to answer
Thought: consider previous and subsequent steps
Action:
```
$JSON_BLOB
```
Observation: action result
... (repeat Thought/Action/Observation N times)
Thought: I know what to respond
Action:
```
{{
  "action": "Final Answer",
  "action_input": "Final response to human"
}}
Begin! Reminder to ALWAYS respond with a valid json blob of a single action.
Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation"""

In [25]:
#This prompt tells the agent to display the user input followed by the intermediate steps taken by the agent as part of the agent_scratchpad
human_prompt = """{input}
{agent_scratchpad}
(reminder to always respond in a JSON blob)"""

In [26]:
#stablish the order of our newly defined prompts in the prompt template. 
#We create this new template to feature the system_prompt followed by an optional list of messages collected in the agent's memory, if any.
#finally, the human_prompt which includes both the human input and agent_scratchpad
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history", optional=True),
        ("human", human_prompt),
    ]
)

In [27]:
#finalize our prompt template by adding the tool names, descriptions and arguments using a partial prompt template.
#This allows the agent to access the information pertaining to each tool
#including the intended use cases and also means we can add and remove tools without altering our entire prompt template.
prompt = prompt.partial(
    tools=render_text_description_and_args(list(tools)),
    tool_names=", ".join([t.name for t in tools]),
)

In [28]:
#Setting up agents memory
#use LangChain's ConversationBufferMemory() as a means of memory storage.
memory = ConversationBufferMemory()

  memory = ConversationBufferMemory()


In [29]:
#set up a chain with our agent's scratchpad, memory, prompt and the LLM.
#The AgentExecutor class is used to execute the agent. It takes the agent, its tools, error handling approach, verbose parameter and memory as parameters.
chain = (
    RunnablePassthrough.assign(
        agent_scratchpad=lambda x: format_log_to_str(x["intermediate_steps"]),
        chat_history=lambda x: memory.chat_memory.messages,
    )
    | prompt
    | llm
    | JSONAgentOutputParser()
)

agent_executor = AgentExecutor(
    agent=chain, tools=tools, handle_parsing_errors=True, verbose=True, memory=memory
)

In [30]:
#We are now able to ask the agent questions.
agent_executor.invoke({"input": "Where was the 2024 US Open Tennis Championship?"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m

Question: Where was the 2024 US Open Tennis Championship?
Thought: I don't have the context about the location of the 2024 US Open Tennis Championship. I need to use the get_IBM_US_Open_context tool to find out.
Action:
```
{
  "action": "get_IBM_US_Open_context",
  "action_input": "Location of the 2024 US Open Tennis Championship"
}
```
Observation[0m[36;1m[1;3m[Document(metadata={'description': "IBM and the United States Tennis Association (USTA) announced several watsonx-powered fan features coming to the US Open digital platforms ahead of this year's tournament. These new and enhanced capabilities – a product of collaboration between IBM and the USTA digital team – aim to deliver a more informative and engaging experience for millions of tennis fans around the world.", 'language': 'en-us', 'source': 'https://newsroom.ibm.com/2024-08-15-ibm-and-the-usta-serve-up-new-and-enhanced-generative-ai-features-for-2024-us-open

{'input': 'Where was the 2024 US Open Tennis Championship?',
 'history': '',
 'output': 'The 2024 US Open Tennis Championship was held in New York, USA.'}