
‚úÖ 4. ParentDocumentRetriever

Use Case: Retrieve parent chunks based on a retrieval of smaller child chunks, great for keeping context intact in long documents.

In [1]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS, Chroma
from langchain.retrievers import ParentDocumentRetriever
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.storage import InMemoryStore
from langchain.schema.document import Document
from dotenv import load_dotenv
import os

# 1. Load environment
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
# 1. Prepare documents
docs = [Document(page_content="LangChain helps build LLM-powered apps with memory and agents.", metadata={"id": "1"}),
        Document(page_content="Agents in LangChain use tools to answer questions.", metadata={"id": "2"})]

# 2. Setup child splitter
child_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)

# 3. Setup vectorstore for children
embedding = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(docs, embedding)

# 4. Parent retriever
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=InMemoryStore(),  # Stores parent docs
    child_splitter=child_splitter
)

# 5. Add documents
retriever.add_documents(docs)

# 6. Retrieve
results = retriever.get_relevant_documents("What are agents?")
for doc in results:
    print("üìÑ Retrieved Doc:", doc.page_content)


  embedding = OpenAIEmbeddings()
  results = retriever.get_relevant_documents("What are agents?")


üìÑ Retrieved Doc: Agents in LangChain use tools to answer questions.
üìÑ Retrieved Doc: LangChain helps build LLM-powered apps with memory and agents.



‚úÖ 5. BM25Retriever

Use Case: Pure keyword-based search (like traditional search engines). No embeddings needed.

In [4]:
# !pip install rank_bm25

In [5]:
from langchain.retrievers import BM25Retriever
from langchain.schema.document import Document

# Create simple text docs
docs = [
    Document(page_content="LangChain enables LLM applications."),
    Document(page_content="Vector search is powerful."),
    Document(page_content="BM25 is a classical retrieval method.")
]

# Create BM25 retriever
bm25_retriever = BM25Retriever.from_documents(docs)

# Retrieve
results = bm25_retriever.get_relevant_documents("How does BM25 work?")
for doc in results:
    print("üìù BM25 Result:", doc.page_content)

üìù BM25 Result: BM25 is a classical retrieval method.
üìù BM25 Result: Vector search is powerful.
üìù BM25 Result: LangChain enables LLM applications.


‚úÖ 6. EnsembleRetriever

Use Case: Combine multiple retrievers (e.g., keyword + vector-based) with weighted scores.

In [7]:
from langchain.retrievers import EnsembleRetriever
from langchain.retrievers import BM25Retriever
from langchain.vectorstores import FAISS,Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema.document import Document

# Sample docs
docs = [Document(page_content="LangChain supports LLMs."), Document(page_content="You can build AI apps using LangChain.")]

# BM25 Retriever
bm25 = BM25Retriever.from_documents(docs)

# Vector Retriever
embedding = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(docs, embedding)
vector_retriever = vectorstore.as_retriever()

# Ensemble Retriever (equal weight)
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25, vector_retriever],
    weights=[0.5, 0.5]
)

# Query
results = ensemble_retriever.get_relevant_documents("AI apps using LangChain")
for doc in results:
    print("üîç Ensemble Doc:", doc.page_content)


üîç Ensemble Doc: You can build AI apps using LangChain.
üîç Ensemble Doc: LangChain helps build LLM-powered apps with memory and agents.
üîç Ensemble Doc: LangChain supports LLMs.


7.TimeWeightedVectorStoreRetriever

This retriever boosts document relevance by factoring recency and importance of interactions (used in agents with memory or relevance ranking).

In [24]:
#7.TimeWeightedVectorStoreRetriever
#This retriever boosts document relevance by factoring recency and importance of interactions (used in agents with memory or relevance ranking).

from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS, Chroma
from langchain.retrievers import TimeWeightedVectorStoreRetriever
from langchain.schema import Document
from datetime import datetime
import os

# Initialize embedding & vectorstore
embedding = OpenAIEmbeddings()
docs = [
    Document(page_content="LangChain is for LLM-based apps", metadata={"last_accessed_at": None}),
    Document(page_content="Vector search improves relevance", metadata={"last_accessed_at": None})
]
vectorstore = FAISS.from_documents(docs, embedding)

# TimeWeighted Retriever
retriever = TimeWeightedVectorStoreRetriever(
    vectorstore=vectorstore,
    decay_rate=0.01,
    k=2,
    score_threshold=None
)

# Retrieve
results = retriever.get_relevant_documents("What is LangChain?")
for r in results:
    print(r)
    print(r.page_content)


In [23]:
# from langchain_openai import OpenAIEmbeddings
# from langchain_community.vectorstores import FAISS
# from langchain.retrievers.time_weighted_vectorstore import TimeWeightedVectorStoreRetriever
# from langchain.schema import Document
# from datetime import datetime, timedelta

# embedding = OpenAIEmbeddings()

# # Docs with ISO timestamp strings
# docs = [
#     Document(
#         page_content="LangChain is for LLM-based apps",
#         metadata={"last_accessed_at": (datetime.now() - timedelta(hours=1)).isoformat()}
#     ),
#     Document(
#         page_content="Vector search improves relevance",
#         metadata={"last_accessed_at": datetime.now().isoformat()}
#     )
# ]

# # Build FAISS vectorstore
# vectorstore = FAISS.from_documents(docs, embedding)

# # TimeWeighted retriever
# retriever = TimeWeightedVectorStoreRetriever(
#     vectorstore=vectorstore,
#     decay_rate=0.01,   # how fast old docs decay
#     k=2
# )

# # Test query
# results = retriever.get_relevant_documents("What is LangChain?")

# for r in results:
#     print("CONTENT:", r.page_content)
#     print("METADATA:", r.metadata)
#     print()


In [19]:
# !pip install openai faiss-cpu


In [21]:
# !pip install tavily-python

In [22]:
from langchain.retrievers import TavilySearchAPIRetriever
import os

# Set your Tavily API key
#os.environ["TAVILY_API_KEY"] = "your_tavily_api_key"

from dotenv import load_dotenv
import os
# üîê Load API keys
load_dotenv(".env")
openai_api_key = os.getenv("OPENAI_API_KEY")
tavily_api_key = os.getenv("TAVILY_API_KEY")

# Initialize Tavily Retriever
retriever = TavilySearchAPIRetriever(k=3)

# Perform search
docs = retriever.get_relevant_documents("Latest updates about LangChain")
for doc in docs:
    print(doc.page_content)


LangChain 1.1. LangChain v1.1.0 is now available This release makes agent development more reliable, more structured, and more context-aware ‚Äî powered by a new.
The langchain namespace has been streamlined to focus on essential building blocks for agents, with legacy functionality moved to langchain-classic . To upgrade
LangChain v1.0 is now available!For a complete list of changes and instructions on how to upgrade your code, see the release notes and migration guide.
