In [1]:
%%capture --no-stderr
%pip install langchain chromadb==0.5.3 sentence_transformers langchain_community
%pip install langchain-google-vertexai

Set environment keys

In [2]:
import getpass
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["GOOGLE_API_KEY"] = getpass.getpass()
os.environ["OPENAI_API_KEY"] = getpass.getpass()

Create documents

In [3]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"source": "fish-pets-doc"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"source": "bird-pets-doc"},
    ),
    Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"source": "mammal-pets-doc"},
    ),
]

Create collection and embedded

In [4]:
# from langchain_chroma import Chroma
from chromadb import Client
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer

# vectorstore = Chroma.from_documents(
#     documents,
#     embedding=OpenAIEmbeddings(),
# )

# Initialize ChromaDB
client = Client()
collection = client.create_collection("pets_collection")

# Prepare document contents for embedding
document_texts = [doc.page_content for doc in documents]

# Initialize a sentence-transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Get embeddings for the documents
document_embeddings = model.encode(document_texts)

# Process and store documents in ChromaDB
for i, (embedding, doc) in enumerate(zip(document_embeddings, documents)):
    collection.add(
        ids=[f"doc_{i}"],
        embeddings=[embedding.tolist()],
        metadatas=[doc.metadata],
        documents=[doc.page_content],
    )

  from tqdm.autonotebook import tqdm, trange


Example similarity document query

In [5]:
# Example query text
query_text = "dog"

# Get the embedding for the query text
query_embedding = model.encode([query_text])

# Search for similar documents
results = collection.query(
    query_embeddings=query_embedding.tolist(),
    n_results=3,  # Number of results to retrieve
)

# Display results
for doc, meta, distance in zip(results['documents'][0], results['metadatas'][0], results['distances'][0]):
    print("Document:", doc)
    print("Metadata:", meta)
    print("Similarity Score:", distance)
    print()

Document: Dogs are great companions, known for their loyalty and friendliness.
Metadata: {'source': 'mammal-pets-doc'}
Similarity Score: 1.1311126947402954

Document: Cats are independent pets that often enjoy their own space.
Metadata: {'source': 'mammal-pets-doc'}
Similarity Score: 1.5269932746887207

Document: Goldfish are popular pets for beginners, requiring relatively simple care.
Metadata: {'source': 'fish-pets-doc'}
Similarity Score: 1.6571111679077148



Create retriever

In [6]:
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(lambda query: collection.query(query_embeddings=model.encode([query]), n_results=1))  # select top result

retriever.batch(["cat", "dog"])

[{'ids': [['doc_1']],
  'distances': [[0.935105562210083]],
  'metadatas': [[{'source': 'mammal-pets-doc'}]],
  'embeddings': None,
  'documents': [['Cats are independent pets that often enjoy their own space.']],
  'uris': None,
  'data': None,
  'included': ['metadatas', 'documents', 'distances']},
 {'ids': [['doc_0']],
  'distances': [[1.1311126947402954]],
  'metadatas': [[{'source': 'mammal-pets-doc'}]],
  'embeddings': None,
  'documents': [['Dogs are great companions, known for their loyalty and friendliness.']],
  'uris': None,
  'data': None,
  'included': ['metadatas', 'documents', 'distances']}]

Define LLM model and prompt and chaining all modules

In [7]:
from langchain_google_vertexai import ChatVertexAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

llm = ChatVertexAI(model="gemini-1.5-flash")

message = """
Answer this question using the provided context only.

{question}

Context:
{context}
"""

prompt = ChatPromptTemplate.from_messages([("human", message)])

rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm

Example chat

In [8]:
response = rag_chain.invoke("Why people love dogs?")

print(response.content)

Retrying langchain_google_vertexai.chat_models._completion_with_retry.<locals>._completion_with_retry_inner in 4.0 seconds as it raised ResourceExhausted: 429 Quota exceeded for aiplatform.googleapis.com/generate_content_requests_per_minute_per_project_per_base_model with base model: gemini-1.5-flash. Please submit a quota increase request. https://cloud.google.com/vertex-ai/docs/generative-ai/quotas-genai..
Retrying langchain_google_vertexai.chat_models._completion_with_retry.<locals>._completion_with_retry_inner in 4.0 seconds as it raised ResourceExhausted: 429 Quota exceeded for aiplatform.googleapis.com/generate_content_requests_per_minute_per_project_per_base_model with base model: gemini-1.5-flash. Please submit a quota increase request. https://cloud.google.com/vertex-ai/docs/generative-ai/quotas-genai..
Retrying langchain_google_vertexai.chat_models._completion_with_retry.<locals>._completion_with_retry_inner in 4.0 seconds as it raised ResourceExhausted: 429 Quota exceeded fo

According to the provided context, people love dogs because they are:

* **Great companions:** This suggests dogs provide companionship and emotional support.
* **Known for their loyalty:** Dogs are often seen as faithful and devoted friends.
* **Friendly:** Dogs are generally known for their sociable and pleasant nature. 

