In [2]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from langchain.schema import Document

from dotenv import load_dotenv

In [7]:
# Step 1: sample Documents
docs = [
    Document(page_content="Langchain helps to build LLM applications"),
    Document(page_content="Langchain is a framework for building language models"),
    Document(page_content="Pinecone is a vector database that stores embeddings of text documents"),
    Document(page_content="The Eiffel tower is located in Paris, France"),
    Document(page_content="Langchain is used to build applications that interact with LLMs"),
] 

### Dense retriever
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
dense_vectorestore = FAISS.from_documents(docs, embedding_model)
dense_retriever = dense_vectorestore.as_retriever()

In [8]:
### Sparse Retriever (BM25)
spares_retriever = BM25Retriever.from_documents(docs)
spares_retriever.k=3 ## Top k documents to retriever

## Step4: Combine with ensamble retriever
hybrid_retriever = EnsembleRetriever(
    retrievers=[dense_retriever, spares_retriever],
    weights=[0.70, 0.3]
)


In [9]:
# Step 5
query = "How can i build an application using LLMs?"
result = hybrid_retriever.invoke(query)

# Step 6
for i, doc in enumerate(result):
    print(f"\n Document {i+1}:\n{doc.page_content}")


 Document 1:
Langchain is used to build applications that interact with LLMs

 Document 2:
Langchain helps to build LLM applications

 Document 3:
Langchain is a framework for building language models

 Document 4:
Pinecone is a vector database that stores embeddings of text documents

 Document 5:
The Eiffel tower is located in Paris, France


### RAG Pipeline with hybrid retriever

In [10]:
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

In [22]:
# Step 5: Prompt template
prompt = PromptTemplate.from_template("""
Answer the question based on the context below.

Context:
{context}

Question: {input}
""")

# Step 6: LLM
llm = ChatOllama(
    model="gemma3:4b-it-q4_K_M",
    num_ctx=32768
)

In [23]:
### Create stuff document chain
document_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=prompt
)

## Create a full rag chain
rag_chain = create_retrieval_chain(
    retriever=hybrid_retriever,
    combine_docs_chain=document_chain
)

rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x714189224590>, search_kwargs={}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x714128f73e30>, k=3)], weights=[0.7, 0.3]), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\nAnswer the question based on the context below.\n\nContext:\n{context}\n\nQuestion: {input}\n')
            | ChatOllama(model='gemma3:4

In [24]:
# Step 9: Ask a question
query = {'input': 'How can I build an app using LLMs?'}
response = rag_chain.invoke(query)

# Step 10: Output
print("Answer:\n", response["answer"])
print("\n Source Documents:")
for i, doc in enumerate(response["context"]):
    print(f"\nDoc {i+1}: {doc.page_content}")


Answer:
 According to the context, you can build an app using LLMs with Langchain. Langchain is a framework for building language models and is used to build applications that interact with LLMs.

 Source Documents:

Doc 1: Langchain helps to build LLM applications

Doc 2: Langchain is used to build applications that interact with LLMs

Doc 3: Langchain is a framework for building language models

Doc 4: Pinecone is a vector database that stores embeddings of text documents

Doc 5: The Eiffel tower is located in Paris, France
