In [1]:
import bs4
from langchain import hub
from langchain_chroma import Chroma #Chroma is a vector database for LLMs
from langchain_community.document_loaders import WebBaseLoader #WebBaseLoader is a loader for web content
from langchain_core.output_parsers import StrOutputParser #StrOutputParser is an output parser for string output
from langchain_core.runnables import RunnablePassthrough #RunnablePassthrough is a runnable that passes its input through without modification
from langchain_openai import OpenAIEmbeddings #OpenAIEmbeddings is an embedding model. An embedding model is a model that can convert text into a vector
from langchain_text_splitters import RecursiveCharacterTextSplitter #RecursiveCharacterTextSplitter is a text splitter. A text splitter is a model that can convert text into a list of smaller pieces
from langchain_openai import ChatOpenAI #ChatOpenAI is an LLM. An LLM is a model that can generate text
import os
from dotenv import load_dotenv

load_dotenv()


USER_AGENT environment variable not set, consider setting it to identify your requests.


True

In [2]:

llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [3]:

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

In [4]:

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")

'Task decomposition is a technique that breaks down complex tasks into smaller and simpler steps. It allows the model to think step by step and transform big tasks into more manageable ones. This process can be done through simple prompting, task-specific instructions, or with human inputs.'