In [1]:
from langchain_community.vectorstores import FAISS
from langchain_community.retrievers import BM25Retriever
from langchain_classic.retrievers import EnsembleRetriever
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.documents import Document


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Step 1 - Sample Documnets
docs = [
    Document(page_content="LangChain is a framework for building applications with LLMs."),
    Document(page_content="Pinecone is a vector database for sematic search"),
    Document(page_content="The Eiffel Tower is located in Paris."),
    Document(page_content="Langchain can be used to develop agentic ai application."),
    Document(page_content="langchain has many types of retrievers")
]

# step 2 - Dense Retrieve (FAISS + HuggingFace)

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
dense_vectorstore = FAISS.from_documents(docs, embedding_model)
dense_retriever = dense_vectorstore.as_retriever()

In [3]:
## Sparse Retriever
sparse_retriever = BM25Retriever.from_documents(docs)
sparse_retriever.k = 3


# step 4 - Combine with Ensamble Retriever
hybrid_retriever = EnsembleRetriever(
    retrievers=[dense_retriever, sparse_retriever],
    weight=[0.7,0.3]
)

In [4]:
hybrid_retriever

EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x17ad10590>, search_kwargs={}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x17ad10c20>, k=3)], weights=[0.5, 0.5])

In [5]:
# step 5 - Query and get result
query = "How can i build an application using LLMs?"

results = hybrid_retriever.invoke(query)
print(results)

for i, doc in enumerate(results):
    print(f"\nDoument {i+1}: {doc.page_content}")


[Document(id='7bea1eba-2e91-48f7-9b50-c7fee9af141d', metadata={}, page_content='Langchain can be used to develop agentic ai application.'), Document(id='9ac9455a-1210-473b-a58f-b93456a16e58', metadata={}, page_content='langchain has many types of retrievers'), Document(id='530d331e-c948-4a47-b2c4-6038e2ad4f1a', metadata={}, page_content='LangChain is a framework for building applications with LLMs.'), Document(id='064f7d73-d891-4a96-8ad2-5ebf94d2969a', metadata={}, page_content='Pinecone is a vector database for sematic search'), Document(metadata={}, page_content='The Eiffel Tower is located in Paris.')]

Doument 1: Langchain can be used to develop agentic ai application.

Doument 2: langchain has many types of retrievers

Doument 3: LangChain is a framework for building applications with LLMs.

Doument 4: Pinecone is a vector database for sematic search

Doument 5: The Eiffel Tower is located in Paris.


# RAG PIPELINE WITH HYBRID

In [6]:
# Step 6 - prompt template
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

template = """Answere the question based on the following context"""
prompt = ChatPromptTemplate.from_messages([
    ("system", template),
    ("human", "Context:\n{context}\n\nQuestion:\n{question}")
])
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='Answere the question based on the following context'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Context:\n{context}\n\nQuestion:\n{question}'), additional_kwargs={})])

In [12]:
from langchain_classic.retrievers import create_retrieval_chain
from langchain_openai import ChatOpenAI


# step 7 - Initialize LLM
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.2)

# Step 8-  Create RAG chain using your hybrid retriever
rag_chain = create_retrieval_chain(
    llm=llm,
    retriever=hybrid_retriever,
    return_source_documents=True
)
rag_chain

ImportError: cannot import name 'create_retrieval_chain' from 'langchain_classic.retrievers' (/Users/sumanyadav/Desktop/Learn/Learn-RAG/.venv/lib/python3.13/site-packages/langchain_classic/retrievers/__init__.py)

In [8]:
query = "How can i build an application using LLMs?"

results = rag_chain.invoke(query)
print(results)

for i, doc in enumerate(results):
    print(f"\nDoument {i+1}: {doc.page_content}")


You can build an application using LLMs by utilizing LangChain, which is a framework specifically designed for building applications with Large Language Models (LLMs).


AttributeError: 'str' object has no attribute 'page_content'