## Vector Store Retriver 

In [1]:
from langchain_core.documents import Document
 
documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"source": "fish-pets-doc"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"source": "bird-pets-doc"},
    ),
    Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"source": "mammal-pets-doc"},
    ),
]

documents

[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.'),
 Document(metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.')]

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_groq import ChatGroq

groq_api_key = os.getenv("GROQ_API_KEY")

os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

llm = ChatGroq(groq_api_key=groq_api_key, model= "llama-3.1-8b-instant")
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x0000018F571DB970>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x0000018F57215E40>, model_name='llama-3.1-8b-instant', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
from langchain_chroma import Chroma

vectorstore = Chroma.from_documents(documents, embedding=embeddings)
vectorstore


<langchain_chroma.vectorstores.Chroma at 0x18f7deb87f0>

In [6]:
vectorstore.similarity_search("cat")

[Document(id='173d44e8-83db-453b-baa3-0f0fc055bd0e', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(id='614ec293-4925-45d8-8502-96c763e70c04', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(id='165bd9c4-dd6d-468a-b481-8fc710024a32', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
 Document(id='1949bf6a-cbd5-4450-b987-edbbd31afdb9', metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.')]

In [7]:
## Async query
await vectorstore.asimilarity_search("cat")

[Document(id='173d44e8-83db-453b-baa3-0f0fc055bd0e', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(id='614ec293-4925-45d8-8502-96c763e70c04', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(id='165bd9c4-dd6d-468a-b481-8fc710024a32', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
 Document(id='1949bf6a-cbd5-4450-b987-edbbd31afdb9', metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.')]

In [8]:
vectorstore.similarity_search_with_score("cat")

[(Document(id='173d44e8-83db-453b-baa3-0f0fc055bd0e', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
  0.9351057410240173),
 (Document(id='614ec293-4925-45d8-8502-96c763e70c04', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
  1.5740900039672852),
 (Document(id='165bd9c4-dd6d-468a-b481-8fc710024a32', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
  1.5956902503967285),
 (Document(id='1949bf6a-cbd5-4450-b987-edbbd31afdb9', metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
  1.6657923460006714)]

### Reteivers

In [9]:
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriver = RunnableLambda(vectorstore.similarity_search).bind(k=1)
retriver.batch(["cat", "dog"])


[[Document(id='173d44e8-83db-453b-baa3-0f0fc055bd0e', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')],
 [Document(id='614ec293-4925-45d8-8502-96c763e70c04', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.')]]

In [10]:
retriver = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k":1}
)

retriver.batch(["cat", "dog"])

[[Document(id='173d44e8-83db-453b-baa3-0f0fc055bd0e', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')],
 [Document(id='614ec293-4925-45d8-8502-96c763e70c04', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.')]]

In [11]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """
Answer this question using the provided context only.

{question}

Context:
{context}
"""
prompt = ChatPromptTemplate.from_messages([("human", message)])

rag_chain = {"context": retriver, "question":RunnablePassthrough()} | prompt | llm

response = rag_chain.invoke("tell me about docs")

In [12]:
response.content

'Based on the provided context, "docs" likely refers to a collection or database of documents. In this specific case, it seems to be related to a document with the ID \'1949bf6a-cbd5-4450-b987-edbbd31afdb9, which contains information about parrots.'