In [6]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

from langchain_openai import AzureChatOpenAI
llm = AzureChatOpenAI(
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"),
    azure_deployment = os.getenv("AZURE_OPENAI_LLM_MODEL"),
    api_version = "2025-01-01-preview",
    api_key = os.getenv("AZURE_OPENAI_API_KEY"),
)

In [7]:
from langchain_core.documents import Document

document = [
    Document(page_content="Dogs are great companions, known for their loyalty and playful nature.", metadata={"source": "mammals-pet-doc"}),
    Document(page_content="Cats are independent animals that often enjoy solitude, but can also be affectionate.", metadata={"source": "mammals-pet-doc"}),
    Document(page_content="Parrots are colorful birds that can mimic human speech and are highly social creatures.", metadata={"source": "birds-pet-doc"}),
    Document(page_content="Goldfish are popular aquarium fish that come in various colors and are relatively easy to care for.", metadata={"source": "fish-pet-doc"}),
    Document(page_content="Hamsters are small rodents that are often kept as pets due to their cute appearance and low maintenance needs.", metadata={"source": "rodents-pet-doc"})
]

document

[Document(metadata={'source': 'mammals-pet-doc'}, page_content='Dogs are great companions, known for their loyalty and playful nature.'),
 Document(metadata={'source': 'mammals-pet-doc'}, page_content='Cats are independent animals that often enjoy solitude, but can also be affectionate.'),
 Document(metadata={'source': 'birds-pet-doc'}, page_content='Parrots are colorful birds that can mimic human speech and are highly social creatures.'),
 Document(metadata={'source': 'fish-pet-doc'}, page_content='Goldfish are popular aquarium fish that come in various colors and are relatively easy to care for.'),
 Document(metadata={'source': 'rodents-pet-doc'}, page_content='Hamsters are small rodents that are often kept as pets due to their cute appearance and low maintenance needs.')]

In [8]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

2026-01-12 14:53:25.191086: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-01-12 14:53:25.236876: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-01-12 14:53:26.220331: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
  if not hasattr(np, "object"):


In [9]:
##Vector Store

from langchain_chroma import Chroma

vector_store = Chroma.from_documents(
    documents=document,
    embedding=embeddings,
)
vector_store

<langchain_chroma.vectorstores.Chroma at 0x79b4abcaacd0>

In [10]:
vector_store.similarity_search("Tell me about parrots.", k=2)

[Document(id='b58044f6-6e6e-49d6-9edd-35eb4e668eb8', metadata={'source': 'birds-pet-doc'}, page_content='Parrots are colorful birds that can mimic human speech and are highly social creatures.'),
 Document(id='efa85eab-f667-42a0-bbae-5b263ef86583', metadata={'source': 'mammals-pet-doc'}, page_content='Dogs are great companions, known for their loyalty and playful nature.')]

In [11]:
## Async query

await vector_store.asimilarity_search("Tell me about parrots.", k=2)

[Document(id='b58044f6-6e6e-49d6-9edd-35eb4e668eb8', metadata={'source': 'birds-pet-doc'}, page_content='Parrots are colorful birds that can mimic human speech and are highly social creatures.'),
 Document(id='efa85eab-f667-42a0-bbae-5b263ef86583', metadata={'source': 'mammals-pet-doc'}, page_content='Dogs are great companions, known for their loyalty and playful nature.')]

In [13]:
await vector_store.asimilarity_search_with_score("Tell me about parrots.", k=2)
#Uses distance metric to return similarity score. Closer to 0 means more similar. C,loser to 1 means less similar.

[(Document(id='b58044f6-6e6e-49d6-9edd-35eb4e668eb8', metadata={'source': 'birds-pet-doc'}, page_content='Parrots are colorful birds that can mimic human speech and are highly social creatures.'),
  0.4635508060455322),
 (Document(id='efa85eab-f667-42a0-bbae-5b263ef86583', metadata={'source': 'mammals-pet-doc'}, page_content='Dogs are great companions, known for their loyalty and playful nature.'),
  1.4931869506835938)]

In [14]:
## Retrievers
"""
Vector stores objects do not subclass Runnable and so cannot be used directly in LangChain pipelines. Instead, we use retrievers which wrap vector stores and provide a standard interface for retrieving documents based on a query and are designed  to be incorporated in LCEL chains.
"""

'\nVector stores objects do not subclass Runnable and so cannot be used directly in LangChain pipelines. Instead, we use retrievers which wrap vector stores and provide a standard interface for retrieving documents based on a query and are designed  to be incorporated in LCEL chains.\n'

In [16]:
## Manual way to create retriever from vector store. But not recommended method.
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(vector_store.similarity_search).bind(k=2)
retriever.batch(["Tell me about parrots.", "Tell me about dogs."])

[[Document(id='b58044f6-6e6e-49d6-9edd-35eb4e668eb8', metadata={'source': 'birds-pet-doc'}, page_content='Parrots are colorful birds that can mimic human speech and are highly social creatures.'),
  Document(id='efa85eab-f667-42a0-bbae-5b263ef86583', metadata={'source': 'mammals-pet-doc'}, page_content='Dogs are great companions, known for their loyalty and playful nature.')],
 [Document(id='efa85eab-f667-42a0-bbae-5b263ef86583', metadata={'source': 'mammals-pet-doc'}, page_content='Dogs are great companions, known for their loyalty and playful nature.'),
  Document(id='12655fb4-cb34-4efb-b8e3-1328ece00328', metadata={'source': 'rodents-pet-doc'}, page_content='Hamsters are small rodents that are often kept as pets due to their cute appearance and low maintenance needs.')]]

In [17]:
## Convert vector store to retriever using built-in method
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k":2}
)
retriever.batch(["Tell me about parrots.", "Tell me about dogs."])

[[Document(id='b58044f6-6e6e-49d6-9edd-35eb4e668eb8', metadata={'source': 'birds-pet-doc'}, page_content='Parrots are colorful birds that can mimic human speech and are highly social creatures.'),
  Document(id='efa85eab-f667-42a0-bbae-5b263ef86583', metadata={'source': 'mammals-pet-doc'}, page_content='Dogs are great companions, known for their loyalty and playful nature.')],
 [Document(id='efa85eab-f667-42a0-bbae-5b263ef86583', metadata={'source': 'mammals-pet-doc'}, page_content='Dogs are great companions, known for their loyalty and playful nature.'),
  Document(id='12655fb4-cb34-4efb-b8e3-1328ece00328', metadata={'source': 'rodents-pet-doc'}, page_content='Hamsters are small rodents that are often kept as pets due to their cute appearance and low maintenance needs.')]]

In [25]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

message = """
Answer the question based on the context only.

Question: {question}

Context: {context}
"""

prompt = ChatPromptTemplate.from_template(message)

rag_chain = {"context": retriever,"question": RunnablePassthrough()} \
    | prompt \
    | llm \
    | StrOutputParser()

response = rag_chain.invoke("Tell me about parrots.")
response

'Parrots are colorful birds that can mimic human speech and are highly social creatures.'

In [None]:
|