# LangChain RAG with Contextual Semantic Chunking

This notebook demonstrates using LangChain with:
- **Semantic Chunking**: Natural boundary detection
- **Contextual Enhancement**: LLM-generated context for each chunk
- **PGVector**: PostgreSQL vector store with hybrid search

In [None]:
from src.storage.contextual_langchain_knowledge import ContextualLangChainKnowledge
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA
from src.config import settings

## Initialize Knowledge Base

In [None]:
kb = ContextualLangChainKnowledge(collection_name="economics_enhanced_langchain")

## Ingest PDFs (Optional - Run Once)

In [None]:
# Uncomment to ingest PDFs
# kb.ingest_directory("data/pdfs")

## Simple Similarity Search

In [None]:
query = "What are the main principles of economics?"
results = kb.search(query, limit=3)

for i, doc in enumerate(results, 1):
    print(f"\n{'='*80}")
    print(f"Result {i}:")
    print(f"Source: {doc.metadata.get('source', 'Unknown')}")
    print(f"Page: {doc.metadata.get('page', 'Unknown')}")
    print(f"\nContent:\n{doc.page_content[:500]}...")

## Search with Relevance Scores

In [None]:
results_with_scores = kb.search_with_score(query, limit=3)

for i, (doc, score) in enumerate(results_with_scores, 1):
    print(f"\n{'='*80}")
    print(f"Result {i} - Relevance Score: {score:.4f}")
    print(f"Source: {doc.metadata.get('source', 'Unknown')}")
    print(f"\nContent:\n{doc.page_content[:300]}...")

## RAG with LangChain QA Chain

In [None]:
# Initialize LLM
llm = ChatGoogleGenerativeAI(
    model=settings.llm_model,
    google_api_key=settings.google_api_key,
    temperature=0,
)

# Create retriever
retriever = kb.as_retriever(search_kwargs={"k": 5})

# Create QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
)

## Ask Questions

In [None]:
question = "What are the main principles of economics?"
result = qa_chain.invoke({"query": question})

print("Question:", question)
print("\nAnswer:")
print(result["result"])
print("\n" + "="*80)
print(f"\nSources ({len(result['source_documents'])} documents):")
for i, doc in enumerate(result["source_documents"], 1):
    print(f"\n{i}. {doc.metadata.get('source', 'Unknown')} (Page {doc.metadata.get('page', 'Unknown')})")

## Interactive Q&A

In [None]:
def ask(question: str):
    """Ask a question and get an answer with sources."""
    result = qa_chain.invoke({"query": question})
    print("\n" + "="*80)
    print(f"Q: {question}")
    print("="*80)
    print(f"\nA: {result['result']}")
    print(f"\nðŸ“š Sources: {len(result['source_documents'])} documents")
    return result

# Try it out
ask("What is supply and demand?")