In [None]:
!pip install langchain_chroma

In [None]:
!pip install langchain_huggingface

In [1]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="The quick brown fox jumps over the lazy dog",
        metadata={"source": "a"},
    ),  
    Document(
        page_content="The yellow cat plays with the ball",
        metadata={"source": "b"},
    ),
    Document(
        page_content="The blue bird sings a song",
        metadata={"source": "c"},
    ),
    Document(
        page_content="The green turtle swims in the pond",
        metadata={"source": "d"},
    )
]

In [2]:
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq
load_dotenv()

groq_api_key = os.getenv("GROQ_API_KEY")

os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

llm = ChatGroq(model="meta-llama/llama-4-scout-17b-16e-instruct", groq_api_key=groq_api_key)

In [7]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2"
)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [9]:
from langchain_chroma import Chroma

vectorstore = Chroma.from_documents(
    documents,
    embedding=embeddings
)



In [14]:
vectorstore.similarity_search("hop")

[Document(id='efe39ae7-feff-4b85-8739-0e2e9dc03d33', metadata={'source': 'a'}, page_content='The quick brown fox jumps over the lazy dog'),
 Document(id='bebc4537-04b4-4a79-bc9d-654fe8e6b61e', metadata={'source': 'c'}, page_content='The blue bird sings a song'),
 Document(id='44613abc-0bcd-4eda-afbd-a3486d8a18c2', metadata={'source': 'b'}, page_content='The yellow cat plays with the ball'),
 Document(id='1b142843-78ba-479d-8034-1e6b63161b01', metadata={'source': 'd'}, page_content='The green turtle swims in the pond')]

In [16]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(
    lambda x: vectorstore.similarity_search(x, k=1)
)

retriever.batch(["fox", "bird"])

[[Document(id='efe39ae7-feff-4b85-8739-0e2e9dc03d33', metadata={'source': 'a'}, page_content='The quick brown fox jumps over the lazy dog')],
 [Document(id='bebc4537-04b4-4a79-bc9d-654fe8e6b61e', metadata={'source': 'c'}, page_content='The blue bird sings a song')]]

In [18]:
retriever = vectorstore.as_retriever(
    search_kwargs={"k": 1},
    search_type="similarity",
)

retriever.batch(["fox", "bird"])

[[Document(id='efe39ae7-feff-4b85-8739-0e2e9dc03d33', metadata={'source': 'a'}, page_content='The quick brown fox jumps over the lazy dog')],
 [Document(id='bebc4537-04b4-4a79-bc9d-654fe8e6b61e', metadata={'source': 'c'}, page_content='The blue bird sings a song')]]

In [None]:
## RAG
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """
Answer this question with the provided context only.

{question}

Context:

{context}
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("human", message),
    ]
)

rag_chain = {"context": retriever, "question": RunnablePassthrough()}|prompt|llm

response = rag_chain.invoke("What does the turtle do and where?")
print(response.content)

The turtle swims in the pond.
