In [1]:
%pip install langgraph

Collecting langgraph
  Downloading langgraph-0.6.7-py3-none-any.whl.metadata (6.8 kB)
Collecting langgraph-checkpoint<3.0.0,>=2.1.0 (from langgraph)
  Using cached langgraph_checkpoint-2.1.1-py3-none-any.whl.metadata (4.2 kB)
Collecting langgraph-prebuilt<0.7.0,>=0.6.0 (from langgraph)
  Downloading langgraph_prebuilt-0.6.4-py3-none-any.whl.metadata (4.5 kB)
Collecting langgraph-sdk<0.3.0,>=0.2.2 (from langgraph)
  Downloading langgraph_sdk-0.2.6-py3-none-any.whl.metadata (1.5 kB)
Collecting xxhash>=3.5.0 (from langgraph)
  Using cached xxhash-3.5.0-cp313-cp313-win_amd64.whl.metadata (13 kB)
Collecting ormsgpack>=1.10.0 (from langgraph-checkpoint<3.0.0,>=2.1.0->langgraph)
  Using cached ormsgpack-1.10.0-cp313-cp313-win_amd64.whl.metadata (44 kB)
Downloading langgraph-0.6.7-py3-none-any.whl (153 kB)
Using cached langgraph_checkpoint-2.1.1-py3-none-any.whl (43 kB)
Downloading langgraph_prebuilt-0.6.4-py3-none-any.whl (28 kB)
Downloading langgraph_sdk-0.2.6-py3-none-any.whl (54 kB)
Using 

In [3]:
# LangGraph Migration for Bangladesh Legal RAG
# This notebook shows how to migrate from LangChain retrieval chains to LangGraph

# ==========================================
# Cell 1: Install and Import Required Packages
# ==========================================

# !pip install langgraph langchain-groq langchain-openai langchain-pinecone pinecone-client python-dotenv

import os
from dotenv import load_dotenv
from typing import TypedDict, List, Annotated
from langchain_openai import OpenAIEmbeddings
# from langchain_pinecone import PineconeVectorStore
# from pinecone import Pinecone

from pinecone import Pinecone, ServerlessSpec  # Main Pinecone client
from langchain_pinecone import PineconeVectorStore  # LangChain wrapper

from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langgraph.graph import StateGraph, END, add_messages
from langchain_core.messages import HumanMessage, AIMessage

load_dotenv()

True

In [4]:
# ==========================================
# Cell 2: Define State for LangGraph
# ==========================================

class LegalRAGState(TypedDict):
    """
    State definition for our Legal RAG workflow
    This replaces the simple input/output of the original chain
    """
    # Input
    question: str
    
    # Intermediate states
    retrieved_docs: List[Document]
    context: str
    
    # Output
    answer: str
    
    # Metadata for tracking
    metadata: dict
    
    # Optional: conversation history for multi-turn
    messages: Annotated[list, add_messages]

print("✅ State definition complete")

✅ State definition complete


In [5]:
# ==========================================
# Cell 3: Setup Components (Same as Original)
# ==========================================

def setup_components():
    """Setup embeddings, vectorstore, retriever, and LLM - same as original"""
    
    # OpenAI embeddings
    embeddings = OpenAIEmbeddings(
        model="text-embedding-3-large",
        api_key=os.getenv("OPENAI_API_KEY")
    )
    
    # Pinecone setup
    os.environ["PINECONE_API_KEY"] = os.getenv("PINECONE_API_KEY")
    pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
    
    # Vectorstore
    vectorstore = PineconeVectorStore(
        index=pc.Index("act"),  # Your existing index
        embedding=embeddings
    )
    
    # Retriever - same configuration as original
    retriever = vectorstore.as_retriever(
        search_type="similarity", 
        search_kwargs={'k': 5}
    )
    
    # LLM - same as original
    llm = ChatGroq(
        groq_api_key=os.getenv("GROQ_API_KEY"),
        model_name="meta-llama/llama-4-scout-17b-16e-instruct",
        temperature=0.1,
        max_tokens=None
    )
    
    return retriever, llm, embeddings

retriever, llm, embeddings = setup_components()
print("✅ Components setup complete")
print(f"📊 Embedding dimension: {len(embeddings.embed_query('test'))}")

✅ Components setup complete
📊 Embedding dimension: 3072


In [6]:
# ==========================================
# Cell 4: System Prompt (Same as Original)
# ==========================================

SYSTEM_PROMPT = (
    # Your original Bengali + English system prompt
    "আপনি বাংলাদেশের আইনভিত্তিক একটি লিগ্যাল চ্যাটবট। আপনার জ্ঞানভাণ্ডারে আইন/অ্যাক্ট, বিধি/রুলস, "
    "অধ্যাদেশ (Ordinance), সংশোধনী (Amendment), প্রজ্ঞাপন/গেজেট/সার্কুলার/নোটিফিকেশন, SRO/GO/RO ইত্যাদি থাকতে পারে। "
    "আপনি RAG চেইনের মাধ্যমে কনটেক্সট (context) পাবেন এবং শুধুমাত্র সেই নথি-উৎসের তথ্যের ভিত্তিতে উত্তর দেবেন—"
    "কল্পনানির্ভর বা অনুমান নির্ভর কিছু বলবেন না।\n\n"
    
    "নীতিমালা:\n"
    "1) হায়ারার্কি মান্য: আইন → অংশ → অধ্যায় → ধারা → উপধারা → দফা/উপদফা। যে তথ্য উদ্ধৃত করবেন, "
    "   সম্ভব হলে নির্দিষ্ট রেফারেন্স (আইনের নাম, ধারা/উপধারা/দফা নম্বর, গেজেট/প্রজ্ঞাপনের তারিখ ও নম্বর) দিন।\n"
    "2) উত্তরভাষা: ডিফল্টে **বাংলা** ভাষায় উত্তর দিন। শেষে ২—৩ লাইনের একটি **English summary** দিন। "
    "   যদি ব্যবহারকারী স্পষ্টভাবে ইংরেজি চান, তখন সম্পূর্ণ ইংরেজিতেই দিতে পারেন।\n"
    "3) নির্ভুলতা: প্রাসঙ্গিক অংশ না পেলে বলুন—'প্রদত্ত কনটেক্সটে সুনির্দিষ্ট রেফারেন্স পাওয়া যায়নি'। "
    "   প্রয়োজনে স্পষ্টীকরণ প্রশ্ন করুন (যেমন: আইন/বিধির সাল, ধারা নম্বর)।\n"
    
    "নিচের context শুধুমাত্র RAG থেকে এসেছে—উত্তর দেওয়ার সময় এটিই ব্যবহার করুন:\n"
    "{context}\n\n"
    
    "You are a Bangladesh Law Assistant. Answer based only on the provided context."
)

print("✅ System prompt configured")

✅ System prompt configured


In [7]:
# ==========================================
# Cell 5: LangGraph Nodes (NEW!)
# ==========================================

def retrieve_node(state: LegalRAGState) -> LegalRAGState:
    """
    Retrieval node - replaces the retriever.invoke() call
    """
    print(f"🔍 Retrieving documents for: {state['question']}")
    
    # Retrieve documents using the same retriever as original
    retrieved_docs = retriever.invoke(state["question"])
    
    # Create context string from documents
    context = "\n\n".join([doc.page_content for doc in retrieved_docs])
    
    print(f"📄 Retrieved {len(retrieved_docs)} documents")
    
    # Update state
    return {
        **state,
        "retrieved_docs": retrieved_docs,
        "context": context,
        "metadata": {
            "num_docs_retrieved": len(retrieved_docs),
            "retrieval_method": "similarity_search",
            "avg_doc_length": sum(len(doc.page_content) for doc in retrieved_docs) / len(retrieved_docs) if retrieved_docs else 0
        }
    }

def generate_node(state: LegalRAGState) -> LegalRAGState:
    """
    Generation node - replaces the LLM chain invoke
    """
    print("🤖 Generating answer using LLM...")
    
    # Create prompt with context (same as your original prompt structure)
    prompt = ChatPromptTemplate.from_messages([
        ("system", SYSTEM_PROMPT),
        ("human", "{question}")
    ])
    
    # Format prompt with context and question
    formatted_prompt = prompt.format_messages(
        context=state["context"],
        question=state["question"]
    )
    
    # Generate response using the same LLM
    response = llm.invoke(formatted_prompt)
    answer = response.content
    
    print("✅ Answer generated")
    
    # Add to conversation history
    messages = state.get("messages", [])
    messages.extend([
        HumanMessage(content=state["question"]),
        AIMessage(content=answer)
    ])
    
    return {
        **state,
        "answer": answer,
        "messages": messages,
        "metadata": {
            **state.get("metadata", {}),
            "answer_length": len(answer),
            "llm_model": "meta-llama/llama-4-scout-17b-16e-instruct"
        }
    }

print("✅ LangGraph nodes defined")

✅ LangGraph nodes defined


In [8]:
# ==========================================
# Cell 6: Build LangGraph Workflow
# ==========================================

def create_legal_rag_graph():
    """
    Create the LangGraph workflow - this replaces create_retrieval_chain()
    """
    
    # Create state graph
    workflow = StateGraph(LegalRAGState)
    
    # Add nodes (these replace the single retrieval chain)
    workflow.add_node("retrieve", retrieve_node)
    workflow.add_node("generate", generate_node)
    
    # Define the flow
    workflow.set_entry_point("retrieve")
    workflow.add_edge("retrieve", "generate")
    workflow.add_edge("generate", END)
    
    # Compile the graph
    app = workflow.compile()
    
    return app

# Create the graph (replaces rag_chain = create_retrieval_chain(...))
rag_graph = create_legal_rag_graph()
print("✅ LangGraph workflow created")

✅ LangGraph workflow created


In [9]:
# ==========================================
# Cell 7: Compare Original vs LangGraph Usage
# ==========================================

def ask_question_langgraph(question: str):
    """
    LangGraph version - replaces rag_chain.invoke()
    """
    initial_state = {
        "question": question,
        "retrieved_docs": [],
        "context": "",
        "answer": "",
        "metadata": {},
        "messages": []
    }
    
    # Run the graph
    result = rag_graph.invoke(initial_state)
    return result

# Example usage comparison
test_question = "কোম্পানি' বলতে কোন কোন সত্তা অন্তর্ভুক্ত?"

print("🧪 Testing LangGraph approach...")
print("=" * 60)

# LangGraph approach
langgraph_response = ask_question_langgraph(test_question)

print("📝 Answer:")
print(langgraph_response["answer"])
print("\n📊 Metadata:")
for key, value in langgraph_response["metadata"].items():
    print(f"  {key}: {value}")

🧪 Testing LangGraph approach...
🔍 Retrieving documents for: কোম্পানি' বলতে কোন কোন সত্তা অন্তর্ভুক্ত?
📄 Retrieved 5 documents
🤖 Generating answer using LLM...
✅ Answer generated
📝 Answer:
প্রদত্ত কনটেক্সটে "কোম্পানি" বলতে নিম্নলিখিত সত্তাসমূহ অন্তর্ভুক্ত:

১. যেকোনো কোম্পানি, 
২. ফার্ম, 
৩. ব্যক্তিসংঘ, 
৪. ট্রাস্ট, 
৫. তহবিল, 
৬. সন্ত্বা বা আইনের দ্বারা সৃষ্ট কৃত্রিম ব্যক্তিসত্তা।

📊 Metadata:
  num_docs_retrieved: 5
  retrieval_method: similarity_search
  avg_doc_length: 7430.0
  answer_length: 195
  llm_model: meta-llama/llama-4-scout-17b-16e-instruct


In [10]:
retrieved_docs = retriever.invoke(test_question)

print("This is the context:")
for idx, doc in enumerate(retrieved_docs, start=1):
    print(f"\nchunk:{idx}")
    print(doc.page_content)
    print("Metadata:", doc.metadata)

print("\n------THIS IS THE ANSWER----")
# print(answer)

This is the context:

chunk:1
কোনো কোম্পানির পরিচালক বা স্পন্সর শেয়ারহোল্ডার হইতে হইলে;

৩. আমদানি নিবন্ধন সনদ বা রপ্তানি নিবন্ধন সনদ প্রাপ্তি ও বহাল রাখিতে;

৪. সিটি কর্পোরেশন বা পৌরসভা এলাকায় ট্রেড লাইসেন্স প্রাপ্তি ও নবায়ন করিতে;

৫. সমবায় সমিতির নিবন্ধন পাইতে;

৬. সাধারণ বিমার তালিকাভুক্ত সার্ভেয়ার হইতে এবং লাইসেন্স প্রাপ্তি ও নবায়ন করিতে;


পৃষ্ঠা/Page 204 -------------------------------------------------- **৭.** সিটি কর্পোরেশন, পৌরসভা ও ক্যান্টনমেন্ট বোর্ড এলাকায় ১০ (দশ) লক্ষাধিক টাকার জমি, বিল্ডিং বা অ্যাপার্টমেন্ট বিক্রয় বা লিজ বা হস্তান্তর বা বায়নানামা বা আমমোক্তারনামা নিবন্ধন করিতে;

**৮.** ক্রেডিট কার্ড প্রাপ্তি ও বহাল রাখিতে;

**৯.** চিকিৎসক, দন্ত চিকিৎসক, আইনজীবী, চার্টার্ড অ্যাকাউন্টেন্ট, কস্ট এন্ড ম্যানেজমেন্ট অ্যাকাউন্টেন্ট, প্রকৌশলী, স্থপতি অথবা সার্ভেয়ার হিসাবে বা সমজাতীয় পেশাজীবী হিসাবে কোনো স্বীকৃত পেশাজীবী সংস্থার সদস্যপদ প্রাপ্তি ও বহাল রাখিতে;

**১০.** Muslim Marriages and Divorces (Registration) Act, 1974 (Act No. LII of 1974) এর অধীন নিকাহ্ রেজিস্ট্র

In [None]:


# ==========================================
# Cell 8: Advanced LangGraph Features
# ==========================================

def create_enhanced_legal_rag():
    """
    Enhanced version with conditional logic and error handling
    """
    
    def should_retrieve_more(state: LegalRAGState) -> str:
        """Conditional node to decide if we need more documents"""
        num_docs = len(state.get("retrieved_docs", []))
        
        if num_docs == 0:
            return "no_results"
        elif num_docs < 3:
            return "retrieve_more" 
        else:
            return "generate"
    
    def retrieve_more_node(state: LegalRAGState) -> LegalRAGState:
        """Retrieve with different parameters if first attempt was insufficient"""
        print("🔄 Retrieving more documents with relaxed parameters...")
        
        # Try MMR search with more documents
        mmr_retriever = vectorstore.as_retriever(
            search_type="mmr", 
            search_kwargs={'k': 10, 'fetch_k': 20}
        )
        
        additional_docs = mmr_retriever.invoke(state["question"])
        
        # Combine with existing docs, remove duplicates
        all_docs = state["retrieved_docs"] + additional_docs
        unique_docs = []
        seen_content = set()
        
        for doc in all_docs:
            if doc.page_content not in seen_content:
                unique_docs.append(doc)
                seen_content.add(doc.page_content)
        
        context = "\n\n".join([doc.page_content for doc in unique_docs])
        
        return {
            **state,
            "retrieved_docs": unique_docs,
            "context": context,
            "metadata": {
                **state.get("metadata", {}),
                "enhanced_retrieval": True,
                "total_docs_after_enhancement": len(unique_docs)
            }
        }
    
    def no_results_node(state: LegalRAGState) -> LegalRAGState:
        """Handle case when no documents are found"""
        return {
            **state,
            "answer": "প্রদত্ত প্রশ্নের জন্য আমাদের ডাটাবেসে কোনো প্রাসঙ্গিক আইনি তথ্য পাওয়া যায়নি। দয়া করে প্রশ্নটি আরো স্পষ্ট করুন বা ভিন্নভাবে জিজ্ঞাসা করুন।",
            "metadata": {
                **state.get("metadata", {}),
                "no_results": True
            }
        }
    
    # Build enhanced workflow
    workflow = StateGraph(LegalRAGState)
    
    # Add all nodes
    workflow.add_node("retrieve", retrieve_node)
    workflow.add_node("retrieve_more", retrieve_more_node)
    workflow.add_node("generate", generate_node)
    workflow.add_node("no_results", no_results_node)
    
    # Set entry point
    workflow.set_entry_point("retrieve")
    
    # Add conditional edges
    workflow.add_conditional_edges(
        "retrieve",
        should_retrieve_more,
        {
            "generate": "generate",
            "retrieve_more": "retrieve_more",
            "no_results": "no_results"
        }
    )
    
    # Add regular edges
    workflow.add_edge("retrieve_more", "generate")
    workflow.add_edge("generate", END)
    workflow.add_edge("no_results", END)
    
    return workflow.compile()

# Create enhanced version
enhanced_rag = create_enhanced_legal_rag()
print("✅ Enhanced LangGraph workflow created with conditional logic")

# ==========================================
# Cell 9: Test Enhanced Features
# ==========================================

def test_enhanced_rag():
    """Test the enhanced RAG with different types of questions"""
    
    test_cases = [
        {
            "question": "কোম্পানি' বলতে কোন কোন সত্তা অন্তর্ভুক্ত?",
            "expected": "Should find relevant documents"
        },
        {
            "question": "অসম্ভব আইনি প্রশ্ন যার কোনো উত্তর নেই",
            "expected": "Should trigger no_results handling"
        },
        {
            "question": "ট্রেড লাইসেন্স",
            "expected": "Might need enhanced retrieval"
        }
    ]
    
    for i, test_case in enumerate(test_cases, 1):
        print(f"\n🧪 Test Case {i}: {test_case['question']}")
        print(f"Expected: {test_case['expected']}")
        print("-" * 50)
        
        initial_state = {
            "question": test_case["question"],
            "retrieved_docs": [],
            "context": "",
            "answer": "",
            "metadata": {},
            "messages": []
        }
        
        result = enhanced_rag.invoke(initial_state)
        
        print(f"📝 Answer: {result['answer'][:200]}...")
        print(f"📊 Metadata: {result['metadata']}")
        print("-" * 50)

test_enhanced_rag()

# ==========================================
# Cell 10: Migration Summary and Benefits
# ==========================================

print("""
🚀 MIGRATION COMPLETE: LangChain Retrieval Chain → LangGraph

📈 BENEFITS OF LANGGRAPH APPROACH:

1. **Better State Management**: 
   - Clear state definition with TypedDict
   - Full visibility into intermediate steps
   - Easy debugging and monitoring

2. **Modular Architecture**:
   - Separate nodes for retrieval and generation
   - Easy to modify individual components
   - Better testing and maintenance

3. **Conditional Logic**:
   - Can add decision points in the workflow
   - Handle edge cases (no results, poor quality results)
   - Multi-step reasoning capabilities

4. **Enhanced Error Handling**:
   - Graceful fallbacks
   - Different retrieval strategies
   - Better user experience

5. **Extensibility**:
   - Easy to add new nodes (fact-checking, validation, etc.)
   - Support for parallel processing
   - Multi-turn conversation support

6. **Observability**:
   - Track metadata throughout the process
   - Performance monitoring
   - Better analytics

🎯 WHAT CHANGED:
- rag_chain.invoke() → rag_graph.invoke()
- Single chain → Multi-node workflow
- Simple input/output → Rich state management
- Linear flow → Conditional logic support

🔧 WHAT STAYED THE SAME:
- All original components (embeddings, vectorstore, LLM)
- System prompt and legal expertise
- Bengali/English language support
- Core RAG functionality
""")

# ==========================================
# Cell 11: Production Usage Pattern
# ==========================================

class BangladeshLegalAssistant:
    """
    Production-ready class wrapping the LangGraph RAG system
    """
    
    def __init__(self):
        self.rag_graph = create_enhanced_legal_rag()
        print("✅ Bangladesh Legal Assistant initialized with LangGraph")
    
    def ask(self, question: str) -> dict:
        """Ask a legal question"""
        initial_state = {
            "question": question,
            "retrieved_docs": [],
            "context": "",
            "answer": "",
            "metadata": {},
            "messages": []
        }
        
        return self.rag_graph.invoke(initial_state)
    
    def batch_ask(self, questions: List[str]) -> List[dict]:
        """Process multiple questions"""
        return [self.ask(q) for q in questions]
    
    def get_sources(self, response: dict) -> List[str]:
        """Extract sources from a response"""
        sources = []
        for doc in response.get("retrieved_docs", []):
            if hasattr(doc, 'metadata') and doc.metadata:
                sources.append(str(doc.metadata))
        return sources

# Initialize the assistant
assistant = BangladeshLegalAssistant()

# Example usage
final_question = "কোম্পানি আইনে পরিচালক নিয়োগের নিয়ম কী?"
final_response = assistant.ask(final_question)

print(f"❓ Question: {final_question}")
print(f"💡 Answer: {final_response['answer']}")
print(f"📚 Sources: {len(final_response.get('retrieved_docs', []))} documents retrieved")

print("\n🎉 LangGraph migration completed successfully!")