In [1]:
%%capture --no-stderr
%pip install langchain langchain_community sentence-transformers langchainhub
%pip install -qU langchain-google-vertexai

Set environment keys

In [2]:
import getpass
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"] = "langchain-rag"
os.environ["GOOGLE_API_KEY"] = getpass.getpass()
os.environ["TAVILY_API_KEY"] = getpass.getpass()
os.environ['USER_AGENT'] = 'myagent'

Load document

In [3]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

Split document to chunks

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

Create collection and embedded

In [5]:
# from langchain_chroma import Chroma
from chromadb import Client
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer

# vectorstore = Chroma.from_documents(
#     documents,
#     embedding=OpenAIEmbeddings(),
# )

# Initialize ChromaDB
client = Client()
collection = client.create_collection("lilianweng_collection")

# Prepare document contents for embedding
document_texts = [doc.page_content for doc in all_splits]

# Initialize a sentence-transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Get embeddings for the documents
document_embeddings = model.encode(document_texts)

# Process and store documents in ChromaDB
for i, (embedding, doc) in enumerate(zip(document_embeddings, all_splits)):
    collection.add(
        ids=[f"doc_{i}"],
        embeddings=[embedding.tolist()],
        metadatas=[doc.metadata],
        documents=[doc.page_content],
    )

  from tqdm.autonotebook import tqdm, trange


Create retriever

In [6]:
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(lambda query: collection.query(query_embeddings=model.encode([query]), n_results=6))

Define LLM model

In [7]:
from langchain_google_vertexai import ChatVertexAI

llm = ChatVertexAI(model="gemini-1.5-flash")

Create RAG prompt

In [8]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()

print(example_messages[0].content)

You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: filler question 
Context: filler context 
Answer:


Chaining module

In [9]:
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
)

Chat example

In [10]:
response = rag_chain.invoke("What is Task Decomposition?")

print(response.content)

Task decomposition is the process of breaking down a complex task into smaller, more manageable steps. This can be done through prompting techniques like Chain of Thought (CoT) or Tree of Thoughts (ToT), which guide the model to think step-by-step. Task decomposition can also be achieved through task-specific instructions or human inputs. 



In [11]:
response = rag_chain.invoke("Tell me about dog")

print(response.content)

I'm sorry, but the provided context doesn't contain any information about dogs.  I can't answer your question. 

