# RAG Components

In [1]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from operator import itemgetter
from dotenv import load_dotenv




In [3]:
# 1. Load the Information
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

# 2a. Split the documents into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, 
                                               chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# 2b. Select embedding strategy/ type
openai_embedding = OpenAIEmbeddings()

# 2c. Create the vectorstore
vectorstore = Chroma.from_documents(documents=splits, embedding=openai_embedding)

# 3. Create the retriever
retriever = vectorstore.as_retriever()

# Define the prompt (not unique to RAG)
prompt = """Based on the data provided to you here: {context}. 
Please answer this question: {question}"""

custom_prompt = PromptTemplate.from_template(prompt)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Define the chain (not unique to RAG)
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_prompt
    | ChatOpenAI(model="gpt-3.5-turbo-0125")
    | StrOutputParser()
)

rag_chain.invoke('What is LLM Powered Autonomous Agents')

'LLM Powered Autonomous Agents refers to autonomous agent systems where a large language model (LLM) serves as the core controller or brain of the agent. These agents are designed to break down complex tasks into smaller subgoals, reflect on past actions, and use natural language interfaces to interact with external components such as memory and tools. However, the reliability of model outputs from LLMs in these systems can be questionable, as they may make formatting errors or exhibit rebellious behavior. The potential of LLM in autonomous agents goes beyond generating text or code, as it can be a powerful problem solver.'