In [1]:
GOOGLE_API_KEY = <your-api-key>

In [2]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI

import warnings
warnings.filterwarnings("ignore")

In [3]:
loader = WebBaseLoader(
    web_path= ("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

vector_store = Chroma.from_documents(
    documents=splits,
    embedding= HuggingFaceEmbeddings(),
)

retriever = vector_store.as_retriever()

In [4]:
prompt = hub.pull("rlm/rag-prompt")

llm = ChatGoogleGenerativeAI(
        model="gemini-pro",
        google_api_key=GOOGLE_API_KEY,
        temperature=0,
        convert_system_message_to_human=True,
        max_retries=2,
    )

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition")

'Task decomposition is the process of breaking down a complex task into smaller, more manageable steps. This can be done using a variety of methods, including LLM prompting, task-specific instructions, or human input. Task decomposition can help to make complex tasks more manageable and easier to complete.'