In [1]:
from langchain_core.documents import Document

document =[
  Document(
    page_content="Langchain is a framework for building language models",
    metadata={"source":"langchain.pdf", "page":1}
  ),
  Document(
    page_content="Langchain is a framework for building language models",
    metadata={"source":"langchain.pdf", "page":2}
  ),
  Document(
    page_content="Langchain is a framework for building language models",
    metadata={"source":"langchain.pdf", "page":3}
  ),
  Document(
    page_content="Langchain is a framework for building language models",
    metadata={"source":"langchain.pdf", "page":4}
  ),
]

In [2]:
document

[Document(metadata={'source': 'langchain.pdf', 'page': 1}, page_content='Langchain is a framework for building language models'),
 Document(metadata={'source': 'langchain.pdf', 'page': 2}, page_content='Langchain is a framework for building language models'),
 Document(metadata={'source': 'langchain.pdf', 'page': 3}, page_content='Langchain is a framework for building language models'),
 Document(metadata={'source': 'langchain.pdf', 'page': 4}, page_content='Langchain is a framework for building language models')]

In [17]:
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq

load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

llm = ChatGroq(model="llama-3.3-70b-versatile")
llm

ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 32768, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': False, 'tool_calling': True}, client=<groq.resources.chat.completions.Completions object at 0x000001F5C5CAADA0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001F5C5B8B1C0>, model_name='llama-3.3-70b-versatile', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [5]:
from langchain_chroma import Chroma

In [6]:

vectorstore = Chroma.from_documents(document, embedding = embeddings)

vectorstore

<langchain_chroma.vectorstores.Chroma at 0x1f5391206d0>

In [7]:
vectorstore.similarity_search("lang")

[Document(id='37b6442b-3386-4220-af68-98d1963cfd24', metadata={'page': 1, 'source': 'langchain.pdf'}, page_content='Langchain is a framework for building language models'),
 Document(id='d7c300a5-c5d2-41e7-9a2b-7a98b97ab74c', metadata={'page': 2, 'source': 'langchain.pdf'}, page_content='Langchain is a framework for building language models'),
 Document(id='7bf11e17-e02b-4803-8624-6a9e3cfcc5b0', metadata={'page': 3, 'source': 'langchain.pdf'}, page_content='Langchain is a framework for building language models'),
 Document(id='bdb6cfea-a3dc-4d3b-8e27-16bb0b0809a0', metadata={'page': 4, 'source': 'langchain.pdf'}, page_content='Langchain is a framework for building language models')]

In [8]:
await vectorstore.asimilarity_search_with_score("Framework")

[(Document(id='37b6442b-3386-4220-af68-98d1963cfd24', metadata={'source': 'langchain.pdf', 'page': 1}, page_content='Langchain is a framework for building language models'),
  1.2519457340240479),
 (Document(id='d7c300a5-c5d2-41e7-9a2b-7a98b97ab74c', metadata={'page': 2, 'source': 'langchain.pdf'}, page_content='Langchain is a framework for building language models'),
  1.2519457340240479),
 (Document(id='7bf11e17-e02b-4803-8624-6a9e3cfcc5b0', metadata={'page': 3, 'source': 'langchain.pdf'}, page_content='Langchain is a framework for building language models'),
  1.2519457340240479),
 (Document(id='bdb6cfea-a3dc-4d3b-8e27-16bb0b0809a0', metadata={'page': 4, 'source': 'langchain.pdf'}, page_content='Langchain is a framework for building language models'),
  1.2519457340240479)]

In [9]:
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1)

retriever.batch(["Lang","models"])

[[Document(id='37b6442b-3386-4220-af68-98d1963cfd24', metadata={'source': 'langchain.pdf', 'page': 1}, page_content='Langchain is a framework for building language models')],
 [Document(id='37b6442b-3386-4220-af68-98d1963cfd24', metadata={'page': 1, 'source': 'langchain.pdf'}, page_content='Langchain is a framework for building language models')]]

In [10]:
retriever =  vectorstore.as_retriever(
  search_type="similarity",
  search_kwargs={"k": 1}
)
retriever.batch(["langchain","framework"])

[[Document(id='37b6442b-3386-4220-af68-98d1963cfd24', metadata={'source': 'langchain.pdf', 'page': 1}, page_content='Langchain is a framework for building language models')],
 [Document(id='37b6442b-3386-4220-af68-98d1963cfd24', metadata={'source': 'langchain.pdf', 'page': 1}, page_content='Langchain is a framework for building language models')]]

In [18]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """
  Answer the question using the provided context.
  {question}
  Context:
   {context}
"""

prompt =  ChatPromptTemplate.from_messages([("human", message)])
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()} 
    | prompt 
    | llm
)

response = rag_chain.invoke("tell me about langchain")
print(response.content)

Langchain is a framework for building language models. This information is sourced from a document titled "langchain.pdf" on page 1.
