In [21]:
from dotenv import load_dotenv
load_dotenv()

True

In [22]:
import bs4
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [23]:
llm = ChatOpenAI(model="gpt-3.5-turbo")

In [24]:
loader = WebBaseLoader(
    web_path=("https://lilianweng.github.io/posts/2023-06-23-agent/"),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    )
)
docs = loader.load()

In [25]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

In [26]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [29]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [30]:
response = rag_chain.invoke("define planning in 3 sentence")

In [31]:
response

'Planning involves utilizing an external classical planner to do long-horizon planning, using the Planning Domain Definition Language (PDDL) as an intermediate interface to describe the planning problem. The process includes translating the problem into "Problem PDDL," requesting a classical planner to generate a PDDL plan based on an existing "Domain PDDL," and translating the PDDL plan back into natural language. Essentially, planning is outsourced to an external tool with domain-specific PDDL and a suitable planner, commonly seen in certain robotic setups.'