In [3]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="This is the first document.",
        metadata={"source": "doc1", "author": "Alice", "date": "2023-01-01"}
    ),
    Document(
        page_content="This is the second document.",
        metadata={"source": "doc2", "author": "Bob", "date": "2023-02-01"}
    ),
    Document(
        page_content="This is the third document.",
        metadata={"source": "doc3", "author": "Charlie", "date": "2023-03-01"}
    ),
    Document(
        page_content="This is the fourth document.",
        metadata={"source": "doc4", "author": "Alice", "date": "2023-04-01"}
    ),
    Document(
        page_content="This is the fifth document.",
        metadata={"source": "doc5", "author": "Bob", "date": "2023-05-01"}
    ),
]

In [4]:
documents

[Document(metadata={'source': 'doc1', 'author': 'Alice', 'date': '2023-01-01'}, page_content='This is the first document.'),
 Document(metadata={'source': 'doc2', 'author': 'Bob', 'date': '2023-02-01'}, page_content='This is the second document.'),
 Document(metadata={'source': 'doc3', 'author': 'Charlie', 'date': '2023-03-01'}, page_content='This is the third document.'),
 Document(metadata={'source': 'doc4', 'author': 'Alice', 'date': '2023-04-01'}, page_content='This is the fourth document.'),
 Document(metadata={'source': 'doc5', 'author': 'Bob', 'date': '2023-05-01'}, page_content='This is the fifth document.')]

In [5]:
from langchain_groq import ChatGroq
import os 
from dotenv import load_dotenv
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")

os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN",)

llm = ChatGroq(
    model="llama3-8b-8192",
    groq_api_key=groq_api_key)
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x11665acb0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x116694ee0>, model_name='llama3-8b-8192', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [6]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
#Vector Stores
from langchain_chroma import Chroma
vectorstore = Chroma.from_documents(
    documents=documents,
    embedding=embeddings
)
vectorstore

<langchain_chroma.vectorstores.Chroma at 0x141f67850>

In [8]:
vectorstore.similarity_search(
    query="What is the content of the first document?",
    k=1
)

[Document(id='0ff9f5b8-21a6-492a-a8a9-a30e1a88f2db', metadata={'author': 'Alice', 'date': '2023-01-01', 'source': 'doc1'}, page_content='This is the first document.')]

In [None]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1)
retriever.batch(["first","fourth"])

[[Document(id='0ff9f5b8-21a6-492a-a8a9-a30e1a88f2db', metadata={'source': 'doc1', 'author': 'Alice', 'date': '2023-01-01'}, page_content='This is the first document.')],
 [Document(id='9cf57eca-9ac0-48e2-8830-8ef1473b9e7c', metadata={'author': 'Alice', 'source': 'doc4', 'date': '2023-04-01'}, page_content='This is the fourth document.')]]

In [15]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message= """ 
Answer this question using the provided context only. {question}
Context:{context}"""

prompt = ChatPromptTemplate.from_messages([("human",message)])

rag_chain = {"context": retriever, "question": RunnablePassthrough()}|prompt|llm
response = rag_chain.invoke("What is the content of the first document?")
print(response)


content='The content of the first document is: "This is the first document."' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 97, 'total_tokens': 113, 'completion_time': 0.018742008, 'prompt_time': 0.029302185, 'queue_time': 0.28932972500000004, 'total_time': 0.048044193}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_8b7c3a83f7', 'finish_reason': 'stop', 'logprobs': None} id='run--e8421811-2d18-40d8-a3d2-f5ed0c1b5e51-0' usage_metadata={'input_tokens': 97, 'output_tokens': 16, 'total_tokens': 113}
