In [77]:
! pip install -qU -r requirements.txt

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [91]:
from decouple import AutoConfig
from icecream import ic
from langchain_core.documents import Document
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from typing import List
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate

In [79]:
config = AutoConfig(search_path='../notes-rag/')

HF_TOKEN = config("HF_TOKEN")
GROQ_API_KEY = config("GROQ_API_KEY")
LANGCHAIN_TRACING_V2 = config("LANGCHAIN_TRACING_V2")
LANGCHAIN_ENDPOINT = config("LANGCHAIN_ENDPOINT")
LANGCHAIN_PROJECT = config("LANGCHAIN_PROJECT")
lANGCHAIN_API_KEY = config("LANGCHAIN_API_KEY")

In [80]:
llm = ChatGroq(api_key=GROQ_API_KEY, model="llama3-8b-8192")
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [81]:
documents= [
    Document(
        page_content="dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"}
    ),
    Document(
        page_content="cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"}
    ),
    Document(
        page_content="goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"source": "fish-pets-doc"}
    ),
    Document(
        page_content="parrots are intelligent birds capable of mimicking human speech",
        metadata={"source": "birds-pets-doc"}
    ),
    Document(
        page_content="rabbits are social animals that need plenty of space to hop around",
        metadata={"source": "mammal-pets-doc"}
    )
]

In [82]:
vector_store = Chroma.from_documents(documents, embedding=embeddings)

In [83]:
vector_store.similarity_search("hop around")

[Document(metadata={'source': 'mammal-pets-doc'}, page_content='rabbits are social animals that need plenty of space to hop around'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='rabbits are social animals that need plenty of space to hop around'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='dogs are great companions, known for their loyalty and friendliness.')]

In [84]:
await vector_store.asimilarity_search("hop around")

[Document(metadata={'source': 'mammal-pets-doc'}, page_content='rabbits are social animals that need plenty of space to hop around'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='rabbits are social animals that need plenty of space to hop around'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='dogs are great companions, known for their loyalty and friendliness.')]

In [85]:
vector_store.similarity_search_with_score("hop around")

[(Document(metadata={'source': 'mammal-pets-doc'}, page_content='rabbits are social animals that need plenty of space to hop around'),
  1.2058610916137695),
 (Document(metadata={'source': 'mammal-pets-doc'}, page_content='rabbits are social animals that need plenty of space to hop around'),
  1.2058610916137695),
 (Document(metadata={'source': 'mammal-pets-doc'}, page_content='dogs are great companions, known for their loyalty and friendliness.'),
  1.801571011543274),
 (Document(metadata={'source': 'mammal-pets-doc'}, page_content='dogs are great companions, known for their loyalty and friendliness.'),
  1.801571011543274)]

In [86]:
retriever = RunnableLambda(vector_store.similarity_search).bind(k=1) # not recommended approach

In [87]:
retriever.invoke("cat")
retriever.batch(["cat", "dog"])

[[Document(metadata={'source': 'mammal-pets-doc'}, page_content='cats are independent pets that often enjoy their own space.')],
 [Document(metadata={'source': 'mammal-pets-doc'}, page_content='dogs are great companions, known for their loyalty and friendliness.')]]

In [88]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k":1}) # recommended approach

In [89]:
retriever.invoke("cat")
retriever.batch(["cat", "dog"])

[[Document(metadata={'source': 'mammal-pets-doc'}, page_content='cats are independent pets that often enjoy their own space.')],
 [Document(metadata={'source': 'mammal-pets-doc'}, page_content='dogs are great companions, known for their loyalty and friendliness.')]]

In [93]:
message = """
    Answer this question using the provided context only.
    {question}
    context:
    {context}
"""

In [95]:
prompt = ChatPromptTemplate.from_messages([("human", message)])

In [99]:
rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm

# RunnablePassthrough helps to get question value from invoke method

In [98]:
response = rag_chain.invoke("tell me about dogs")
print(response)

content='According to the provided context, dogs are great companions, known for their loyalty and friendliness.' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 20, 'prompt_tokens': 61, 'total_tokens': 81, 'completion_time': 0.016666667, 'prompt_time': 0.002601166, 'queue_time': 0.012610792, 'total_time': 0.019267833}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_179b0f92c9', 'finish_reason': 'stop', 'logprobs': None} id='run-2bdafcb0-627d-47ca-b70e-9552d8dfef5c-0' usage_metadata={'input_tokens': 61, 'output_tokens': 20, 'total_tokens': 81}
