# RAG with Agents
adopted from: https://python.langchain.com/docs/use_cases/question_answering/conversational_retrieval_agents

In [1]:
import os
import openai
import bs4
from langchain import hub
from langchain.chat_models import ChatOpenAI, AzureChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings import OpenAIEmbeddings, AzureOpenAIEmbeddings
from langchain.schema import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
# from langchain_core.runnables import RunnablePassthrough
from langchain.schema.runnable  import RunnablePassthrough
from ssl_workaround import no_ssl_verification
from dotenv import load_dotenv, find_dotenv

# 1. Init

In [2]:
def init():
    _ = load_dotenv(find_dotenv()) # read local .env file
    openai.api_key = os.environ['OPENAI_API_KEY']
    openai.api_base = os.environ['OPENAI_API_BASE']
    openai.api_type= os.environ['OPENAI_API_TYPE']
    openai.api_version = os.environ['OPENAI_API_VERSION']
    print(f'Openai secrets loaded, models: {os.environ["OPENAI_DEPLOYMENT_ID_LLM"]}, {os.environ["OPENAI_DEPLOYMENT_ID_EMBED"]}')
init()

Openai secrets loaded, models: gpt-35-turbo, text-embedding-ada-002


# 2. Load from Web

In [3]:
# loads URL content into a document - one document per URL
# only extracts the content of the post-content, post-title, and post-header classes
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={
        "parse_only": bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    },
)
docs = loader.load()

In [4]:
# test doc loading
print(f'Nuber of docs loaded: {len(docs)}')
print(f'First doc lenght: {len(docs[0].page_content)}')
print(f'Sample: {docs[0].page_content[:500]}')

Nuber of docs loaded: 1
First doc lenght: 42824
Sample: 

      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In


# 3. Split
- LLM struggle to find info in very long context, we should split it in reasonable long documents
- its good to have overlap between spit documents in order not to loose context from ending / begginging chunk parts

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

In [6]:
# test splitter
print(f'Number of documents created: {len(all_splits)}')
print(f'First document length: {len(all_splits[0].page_content)}')
print(f'Metadata sample: {all_splits[0].metadata}')

Number of documents created: 66
First document length: 969
Metadata sample: {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 8}


# 4. Vector Store

In [7]:
embedding=AzureOpenAIEmbeddings(azure_deployment=os.environ['OPENAI_DEPLOYMENT_ID_EMBED'])

In [8]:
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding)

# Retriever tool

In [9]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [10]:
# retriever test
retrieved_docs = retriever.get_relevant_documents(
    "What are the approaches to Task Decomposition?"
)
print(f'---Number of documents retrieved: {len(retrieved_docs)}')
print(f'---Doc 0 length: {len(retrieved_docs[0].page_content)}')
print(f'---Doc 0: {retrieved_docs[0].page_content}')
print(f'---Doc 1 length: {len(retrieved_docs[1].page_content)}')
print(f'---Doc 1: {retrieved_docs[1].page_content}')

---Number of documents retrieved: 3
---Doc 0 length: 644
---Doc 0: Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.
Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.
---Doc 1 length: 606
---Doc 1: Fig. 1. Overview of a LLM-powered autonomous agent system.
Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a

In [11]:
from langchain.agents.agent_toolkits import create_retriever_tool

In [12]:
tool = create_retriever_tool(
    retriever,
    "LLM_agents_blog",
    "Searches and returns documents regarding LLMs and autonomous agents.",
)
tools = [tool]

# Create Agent

In [13]:
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent

In [14]:
llm = AzureChatOpenAI(azure_deployment=os.environ['OPENAI_DEPLOYMENT_ID_LLM'])

In [15]:
agent_executor = create_conversational_retrieval_agent(llm, tools, verbose=True)

In [17]:
# with no_ssl_verification():
#     result = agent_executor({"input": "hi, im bob"})
#     result

In [18]:
# result = agent_executor(
#     {
#         "input": "How can the reflextion framework be used in LLM apps?"
#     }
# )

all above have errors due to uknown issue between Azure Open AI and Langchain

# 