# RAG with Agents
adopted from: https://python.langchain.com/docs/use_cases/question_answering/conversational_retrieval_agents

In [2]:
import os
import openai
import bs4
from langchain import hub
from langchain_community.chat_models import  AzureChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain_community.embeddings import AzureOpenAIEmbeddings
from langchain.schema import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
# from langchain_core.runnables import RunnablePassthrough
from langchain.schema.runnable  import RunnablePassthrough
from ssl_workaround import no_ssl_verification
from dotenv import load_dotenv, find_dotenv

# 1. Init

In [3]:
def init():
    _ = load_dotenv(find_dotenv()) # read local .env file
    openai.api_key = os.environ['OPENAI_API_KEY']
    openai.api_base = os.environ['OPENAI_API_BASE']
    openai.api_type= os.environ['OPENAI_API_TYPE']
    openai.api_version = os.environ['OPENAI_API_VERSION']
    print(f'Openai secrets loaded, models: {os.environ["OPENAI_DEPLOYMENT_ID_LLM"]}, {os.environ["OPENAI_DEPLOYMENT_ID_EMBED"]}')
init()

Openai secrets loaded, models: gpt-35-turbo, text-embedding-ada-002


# 2. Load from Web

In [4]:
# loads URL content into a document - one document per URL
# only extracts the content of the post-content, post-title, and post-header classes
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={
        "parse_only": bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    },
)
docs = loader.load()

In [5]:
# test doc loading
print(f'Nuber of docs loaded: {len(docs)}')
print(f'First doc lenght: {len(docs[0].page_content)}')
print(f'Sample: {docs[0].page_content[:500]}')

Nuber of docs loaded: 1
First doc lenght: 42824
Sample: 

      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In


# 3. Split
- LLM struggle to find info in very long context, we should split it in reasonable long documents
- its good to have overlap between spit documents in order not to loose context from ending / begginging chunk parts

In [6]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

In [7]:
# test splitter
print(f'Number of documents created: {len(all_splits)}')
print(f'First document length: {len(all_splits[0].page_content)}')
print(f'Metadata sample: {all_splits[0].metadata}')

Number of documents created: 66
First document length: 969
Metadata sample: {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 8}


# 4. Vector Store

In [8]:
persist_directory = './data/test_vectorstore_llm_azureemb/'

In [9]:
embedding=AzureOpenAIEmbeddings(azure_deployment=os.environ['OPENAI_DEPLOYMENT_ID_EMBED'],max_retries=10, show_progress_bar=True)

  warn_deprecated(


In [10]:
# # OPTION 1: create vectorstore from scratch - needed first time
# vectorstore = Chroma.from_documents(
#                                 documents=all_splits, 
#                                 embedding=embedding, 
#                                 persist_directory=persist_directory)

In [11]:
# OPTION 2: load vectorstore from disk - faster
vectorstore = Chroma(
            persist_directory=persist_directory, 
            embedding_function=embedding)

# Retriever tool

In [12]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [13]:
# retriever test
retrieved_docs = retriever.get_relevant_documents(
    "What are the approaches to Task Decomposition?"
)
print(f'---Number of documents retrieved: {len(retrieved_docs)}')
print(f'---Doc 0 length: {len(retrieved_docs[0].page_content)}')
print(f'---Doc 0: {retrieved_docs[0].page_content}')
print(f'---Doc 1 length: {len(retrieved_docs[1].page_content)}')
print(f'---Doc 1: {retrieved_docs[1].page_content}')

  0%|          | 0/1 [00:00<?, ?it/s]

---Number of documents retrieved: 3
---Doc 0 length: 644
---Doc 0: Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.
Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.
---Doc 1 length: 606
---Doc 1: Fig. 1. Overview of a LLM-powered autonomous agent system.
Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a

# Create agent
adopted from: https://github.com/pinecone-io/examples/blob/master/learn/generation/langchain/handbook/08-langchain-retrieval-agent.ipynb

In [35]:
llm = AzureChatOpenAI(azure_deployment=os.environ['OPENAI_DEPLOYMENT_ID_LLM'],max_retries=10)

In [36]:
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.chains import RetrievalQA

# conversational memory
conversational_memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=5,
    return_messages=True
)
# retrieval qa chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever
)

In [37]:
qa.run("How can the reflextion framework be used in LLM apps?")

  0%|          | 0/1 [00:00<?, ?it/s]

"The reflection framework can be used in LLM apps to enable self-reflection, which allows the autonomous agent to improve its actions and decisions by refining past actions and correcting previous mistakes. The heuristic function in the reflection framework determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success, while hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment. By showing two-shot examples to LLM and adding reflections into the agent's working memory, up to three, LLM can use them as context for querying and improving its actions and decisions in the future."

## adding retriever (KB)

In [38]:
from langchain.agents import Tool

tools = [
    Tool(
        name='Knowledge Base',
        func=qa.run,
        description=(
            'use this tool when answering general knowledge queries to get '
            'more information about LLM apps'
        )
    )
]

In [39]:
from langchain.agents import initialize_agent

agent = initialize_agent(
    agent='chat-conversational-react-description',
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3,
    early_stopping_method='generate',
    memory=conversational_memory
)

In [40]:
sys_msg = """Assistant is a large language model.

Assistant is designed to be able to assist with a wide range of tasks related to LLMs, machine learning and programming , \
from answering simple questions to providing in-depth explanations, example code and links to documentation. \
As a language model, Assistant is able to generate human-like text based on the input it receives, \
allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
Assistant can use code spippets and URL links.

Assistant is constantly learning and improving, and its capabilities are constantly evolving. \
It is able to process and understand large amounts of text, and can use this knowledge to provide accurate \
and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input \
it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics related to LLMs, machine learning and programming.

If the question is related to LLMs, machine learning and programming Assistant has to first use the Knowledge Base. \

If the question is not related to LLMs, machine learning and programming, Assistant has to respond: \
I'm sorry this is not my area of expertise.

"""
# If no relevant answer is found in the Knowlege Base, Assistant can use Web Search tool to find an answer on the internet. \

In [41]:
new_prompt = agent.agent.create_prompt(
    system_message=sys_msg,
    tools=tools
)

agent.agent.llm_chain.prompt = new_prompt

test

In [42]:
agent.run("How can Tree of Thoughts be used in LLM apps?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m{
    "action": "Knowledge Base",
    "action_input": "Tree of Thoughts in LLM"
}[0m

  0%|          | 0/1 [00:00<?, ?it/s]

Retrying langchain_community.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the ChatCompletions_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 10 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain_community.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the ChatCompletions_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 5 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain_community.chat_models.openai.ChatOpenAI.comp


Observation: [36;1m[1;3mTree of Thoughts is a method for exploring multiple reasoning possibilities in LLM-powered autonomous agents. It decomposes problems into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS or DFS with each state evaluated by a classifier or majority vote. Task decomposition can be done by LLM with simple prompting, task-specific instructions, or with human inputs.[0m
Thought:

Retrying langchain_community.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the ChatCompletions_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 5 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain_community.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the ChatCompletions_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 1 second. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..


[32;1m[1;3m{
    "action": "Final Answer",
    "action_input": "Tree of Thoughts is a method for exploring multiple reasoning possibilities in LLM-powered autonomous agents. It involves decomposing problems into multiple thought steps and generating multiple thoughts per step, creating a tree structure. The search process can be BFS or DFS with each state evaluated by a classifier or majority vote. Task decomposition can be done by LLM with simple prompting, task-specific instructions, or with human inputs."
}[0m

[1m> Finished chain.[0m


'Tree of Thoughts is a method for exploring multiple reasoning possibilities in LLM-powered autonomous agents. It involves decomposing problems into multiple thought steps and generating multiple thoughts per step, creating a tree structure. The search process can be BFS or DFS with each state evaluated by a classifier or majority vote. Task decomposition can be done by LLM with simple prompting, task-specific instructions, or with human inputs.'

## adding a search

In [27]:
# %pip install --upgrade --quiet  duckduckgo-search

Note: you may need to restart the kernel to use updated packages.


In [24]:
from langchain.tools import DuckDuckGoSearchRun, DuckDuckGoSearchResults
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper

wrapper = DuckDuckGoSearchAPIWrapper(time="y", max_results=3)
search = DuckDuckGoSearchResults(api_wrapper=wrapper)


In [25]:
# test search
search.run("What is the best programming language for ML?")

"[snippet: Lower-level languages (like R, C++, or Java) offer greater speed but are harder to learn. Higher-level languages (like JavaScript and Python) are easier to use but slower to execute. Python is a key language for machine learning and data analytics. For speed-to-competence and breadth of application, it's probably the best one for beginners., title: What's the Best Language for Machine Learning? - CareerFoundry, link: https://careerfoundry.com/en/blog/data-analytics/best-machine-learning-languages/], [snippet: Most Popular Programming Languages & Why They're Useful in Machine Learning - neptune.ai Online forums and data science blogs have plenty of recommendations: learn that language, keep up with that framework, have you looked at that library?, title: Most Popular Programming Languages & Why They're Useful in Machine ..., link: https://neptune.ai/blog/programming-languages-machine-learning], [snippet: Contents 1. Best programming languages for machine learning in 2023 2. 1

In [26]:
tools = [
    Tool(
        name='Knowledge Base',
        func=qa.run,
        description=(
            'First tool to use when answering questions about LLM, LLM apps and the LLM frameworks'
        )
    ),
    Tool(
        name='Web Search',
        func=search.run,
        description=(
            'Tool to use for generic LLMs, machine learning and programming related questions'
        )
    )
]

In [27]:
agent = initialize_agent(
    agent='chat-conversational-react-description',
    tools=tools,
    llm=llm,
    verbose=True,
    max_iterations=3,
    early_stopping_method='generate',
    memory=conversational_memory,
    handle_parsing_errors=True
)

In [28]:
sys_msg = """Assistant is a large language model.

Assistant is designed to be able to assist with a wide range of tasks related to LLMs, machine learning and programming , \
from answering simple questions to providing in-depth explanations, example code and links to documentation. \
As a language model, Assistant is able to generate human-like text based on the input it receives, \
allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
Assistant can use code spippets and URL links.

Assistant is constantly learning and improving, and its capabilities are constantly evolving. \
It is able to process and understand large amounts of text, and can use this knowledge to provide accurate \
and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input \
it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics related to LLMs, machine learning and programming.

If the question is related to LLMs, machine learning and programming Assistant has to first use the Knowledge Base. \
If no relevant answer is found in the Knowlege Base, Assistant can use Web Search tool to find an answer on the internet. \

If the question is not related to LLMs, machine learning and programming, Assistant has to respond: \
I'm sorry this is not my area of expertise.

"""

In [29]:
new_prompt = agent.agent.create_prompt(
    system_message=sys_msg,
    tools=tools
)

agent.agent.llm_chain.prompt = new_prompt

test

In [31]:
agent.run("How can Tree of Thoughts be used in LLM apps?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m{
    "action": "Knowledge Base",
    "action_input": "How can Tree of Thoughts be used in LLM apps?"
}[0m

  0%|          | 0/1 [00:00<?, ?it/s]

Retrying langchain_community.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the ChatCompletions_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 9 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..


RateLimitError: Requests to the ChatCompletions_Create Operation under Azure OpenAI API version 2023-09-01-preview have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 5 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit.

In [34]:
agent.run("What is the best programming language for ML?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m{
    "action": "Web Search",
    "action_input": "Best programming language for machine learning"
}[0m
Observation: [33;1m[1;3m[snippet: 1. Python What is Python? Unambiguously, Python is the best language for machine learning. It's an interpreted language that is easy to read and understand, as its syntax is relatively similar to spoken languages. Because of this, it is also one of the easiest languages to learn., title: The Best Programming Languages for Machine Learning, link: https://tripleten.com/blog/posts/the-best-programming-languages-for-machine-learning], [snippet: Lower-level languages (like R, C++, or Java) offer greater speed but are harder to learn. Higher-level languages (like JavaScript and Python) are easier to use but slower to execute. Python is a key language for machine learning and data analytics. For speed-to-competence and breadth of application, it's probably the best one for beginners., title: Wh

"Python is widely considered the best programming language for machine learning due to its ease of use and readability. Other popular languages in the field include R, Java, and JavaScript. However, Python's popularity, large community, and extensive libraries make it a great choice for beginners and experts alike."

# Hello world search agent below...

basic search

In [28]:
from langchain.tools import DuckDuckGoSearchRun

In [29]:
search = DuckDuckGoSearchRun()

In [30]:
search.run("Obama's first name?")

"August 4, 1961, Honolulu, Hawaii, U.S. (age 62) Title / Office: presidency of the United States of America (2009-2017), United States United States Senate (2005-2008), United States (Show more) Political Affiliation: Democratic Party Awards And Honors: Nobel Prize (2009) Grammy Award (2007) Grammy Award (2005) Life After the Presidency How Tall is Obama? Books and Grammy Hobbies Movies About Obama Quotes 1961-present Who Is Barack Obama? Barack Obama was the 44 th president of the United States... As the head of the government of the United States, the president is arguably the most powerful government official in the world. The president is elected to a four-year term via an electoral college system. Since the Twenty-second Amendment was adopted in 1951, the American presidency has been 2023. 3. Former President Barack Obama, born on Aug. 4, 1961, in Honolulu, Hawaii, made a name for himself during his decades spent in politics starting in 1997 as an Illinois State Senator ... His na

with links

In [31]:
from langchain.tools import DuckDuckGoSearchResults

In [32]:
search = DuckDuckGoSearchResults()

In [33]:
search.run("Obama")

"[snippet: Barack Obama (born August 4, 1961, Honolulu, Hawaii, U.S.) 44th president of the United States (2009-17) and the first African American to hold the office. Before winning the presidency, Obama represented Illinois in the U.S. Senate (2005-08). He was the third African American to be elected to that body since the end of Reconstruction (1877)., title: Barack Obama | Biography, Parents, Education, Presidency, Books ..., link: https://www.britannica.com/biography/Barack-Obama], [snippet: Learn about the life and achievements of Barack Obama, the first Black commander-in-chief of the United States. From his early years in Hawaii and Indonesia to his political career in Illinois and Washington, D.C., discover his family background, education, marriage, hobbies, and legacy., title: Barack Obama: Biography, 44th U.S. President, Politician, link: https://www.biography.com/political-figures/barack-obama], [snippet: Barack Obama is sworn in by Chief Justice John Roberts as the 44th pr

just news

In [34]:
search = DuckDuckGoSearchResults(backend="news")
search.run("Obama")

'[snippet: There is no credible evidence Obama was arrested. Carlson said he never reported it happened and does not have an account on the platform in question., title: Obama wasn\'t arrested. Tucker Carlson didn\'t post a video about it, either | Fact check, link: https://www.msn.com/en-us/news/us/obama-wasnt-arrested-tucker-carlson-didnt-post-a-video-about-it-either-fact-check/ar-BB1hdjnl, date: 2024-01-24T22:58:43+00:00, source: USA Today on MSN.com], [snippet: The former first daughter\'s short, "The Heart," focuses on a sensitive man racked with guilt when his mother dies after they have an argument., title: Malia Obama at Sundance: A Fledgling Filmmaker Makes Her Debut, link: https://www.nytimes.com/2024/01/26/movies/malia-obama-sundance-film-festival.html, date: 2024-01-26T14:45:00+00:00, source: The New York Times], [snippet: The world is not just about snuggling with housebroken puppies and rainbows out your window and riding unicorns bareback.\xa0 The real world can be terri

wrapper with more control

In [35]:
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper

wrapper = DuckDuckGoSearchAPIWrapper(region="de-de", time="d", max_results=2)

In [36]:
search = DuckDuckGoSearchResults(api_wrapper=wrapper, source="news")
search.run("Obama")

'[snippet: Aktuell gibt es Gerüchte, dass die Demokraten deswegen eine andere Kandidatin gegen Donald Trump ins Rennen schicken könnten. Wie das Nachrichtenportal "oe24.at" berichtet, wird Michelle Obama derzeit als Biden-Nachfolgerin gehandelt. Die Ehefrau von Ex-Präsident Barack Obama hat eine Kandidatur zwar bislang immer wieder verneint, doch Spekulationen gibt es dennoch immer wieder., title: Joe Biden: US-Präsident abgestürzt! Michelle Obama soll seine ..., link: https://www.news.de/politik/857545585/joe-biden-stuerzt-ab-im-politiker-ranking-keine-zweite-amtzeit-michelle-obama-als-praesidentin-geruechte-um-kandidatur-us-wahl-2024/1/], [snippet: Aber auch in unzähligen Privathäusern hat Rissel TV-Geräte verschwinden lassen, die bei Bedarf wieder hinter Bildern auftauchen, aus der Decke oder aus Schrankelementen herausfahren. Seit zwei Jahren nutzt auch Ex-US-Präsident Barack Obama in seiner Villa im kalifornischen Santa Monica ein TV-Lift-System von Sascha Rissel., title: Auch Bar