# ICC Enhanced RAG System - Production Deployment

**Architecture:**
- 🔍 **Enhanced Vector Search**: Dual-index retrieval with intelligent routing using `databricks-gte-large-en`
- 🧠 **Advanced LLM**: `databricks-meta-llama-3-3-70b-instruct` for legal analysis
- 🚀 **MLflow 3.0**: Production deployment and model management
- ⚖️ **Legal Expertise**: Specialized for ICC defense team research

**Data Sources:**
- **Past Judgments Index**: `past_judgement` (ICTY/ICC case law)
- **Geneva Documentation Index**: `geneva_documentation` (IHL framework)
- **Vector Search Endpoint**: `jgmt` (with databricks-gte-large-en embedding model)

**Key Features:**
- Intelligent routing based on legal topics
- Enhanced retrieval with relevance boosting
- Comprehensive legal analysis generation
- Production-ready MLflow 3.0 deployment


In [None]:
%pip install -U -qqqq mlflow>=3.1.1 langchain databricks-langchain pydantic databricks-agents unitycatalog-langchain[databricks] uv databricks-feature-engineering==0.12.1
dbutils.library.restartPython()


In [None]:
import json
import pandas as pd
import numpy as np
from typing import List, Dict, Any, Optional
from dataclasses import dataclass, asdict
import datetime
import logging
import re

import mlflow
from mlflow.models import infer_signature
from mlflow.models.resources import (
    DatabricksVectorSearchIndex,
    DatabricksServingEndpoint
)

# Vector Search and LLM
from databricks.vector_search.client import VectorSearchClient
from databricks.sdk import WorkspaceClient
from langchain_community.chat_models import ChatDatabricks
from langchain.schema import HumanMessage, SystemMessage
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferWindowMemory

print("✅ Enhanced RAG dependencies loaded")


## Enhanced Configuration & Legal Topics


In [None]:
# DEPRECATED: This cell has been replaced by the updated configuration cell below
# Enhanced Configuration - Updated Models
VECTOR_SEARCH_ENDPOINT = "jgmt"  # Uses databricks-gte-large-en for semantic search
PAST_JUDGMENTS_INDEX = "past_judgement"
GENEVA_DOCUMENTATION_INDEX = "geneva_documentation"
LLM_MODEL_ENDPOINT = "databricks-meta-llama-3-3-70b-instruct"  # Conversational LLM

# Search parameters
DEFAULT_TOP_K = 10
MAX_CONTEXT_LENGTH = 4000
SIMILARITY_THRESHOLD = 0.7
MAX_TOKENS = 2048
TEMPERATURE = 0.1

# Legal topics for intelligent routing
LEGAL_TOPICS = {
    "judgment_priority": [
        "overall control", "state", "protected persons", "active participation", "direct participation",
        "combatant status", "combatant privilege", "civilian status", "duty to protect",
        "organisation of armed groups", "principle of distinction", "indiscriminate attack",
        "civilian population", "military objectives", "military objects", "rule of proportionality",
        "principle of proportionality", "collateral damage", "military necessity",
        "military imperative", "security of civilians", "imperative military reasons",
        "conduct of hostilities", "means of warfare", "methods of warfare",
        "attacks against protected objects", "religious buildings", "displacement",
        "deportation", "coercion", "cruel treatment", "torture", "outrages against dignity",
        "murder", "self-defense", "causal link", "checkpoints", "roadblocks",
        "icty", "trial chamber", "appeals chamber", "judgment", "applied", "practice"
    ],
    "geneva_priority": [
        "geneva convention", "international humanitarian law", "ihl", "protected persons",
        "wounded and sick", "prisoners of war", "civilians", "medical personnel",
        "religious personnel", "cultural property", "distinctive emblems", "red cross",
        "red crescent", "additional protocol", "grave breaches", "serious violations",
        "customary international law", "treaty law", "convention", "protocol"
    ]
}

print("✅ Enhanced configuration loaded with legal topics and updated models")


In [None]:
# Enhanced Configuration
VECTOR_SEARCH_ENDPOINT = "jgmt"
PAST_JUDGMENTS_INDEX = "past_judgement"
GENEVA_DOCUMENTATION_INDEX = "geneva_documentation"
LLM_MODEL_ENDPOINT = "databricks-meta-llama-3-3-70b-instruct"

# Search parameters
DEFAULT_TOP_K = 10
MAX_CONTEXT_LENGTH = 4000
SIMILARITY_THRESHOLD = 0.7
MAX_TOKENS = 2048
TEMPERATURE = 0.1

# Legal topics for intelligent routing
LEGAL_TOPICS = {
    "judgment_priority": [
        "overall control", "state", "protected persons", "active participation", "direct participation",
        "combatant status", "combatant privilege", "civilian status", "duty to protect",
        "organisation of armed groups", "principle of distinction", "indiscriminate attack",
        "civilian population", "military objectives", "military objects", "rule of proportionality",
        "principle of proportionality", "collateral damage", "military necessity",
        "military imperative", "security of civilians", "imperative military reasons",
        "conduct of hostilities", "means of warfare", "methods of warfare",
        "attacks against protected objects", "religious buildings", "displacement",
        "deportation", "coercion", "cruel treatment", "torture", "outrages against dignity",
        "murder", "self-defense", "causal link", "checkpoints", "roadblocks",
        "icty", "trial chamber", "appeals chamber", "judgment", "applied", "practice"
    ],
    "geneva_priority": [
        "geneva convention", "international humanitarian law", "ihl", "protected persons",
        "wounded and sick", "prisoners of war", "civilians", "medical personnel",
        "religious personnel", "cultural property", "distinctive emblems", "red cross",
        "red crescent", "additional protocol", "grave breaches", "serious violations",
        "customary international law", "treaty law", "convention", "protocol"
    ]
}

print("✅ Enhanced configuration loaded with legal topics")


## Enhanced Data Structures


In [None]:
@dataclass
class SearchResult:
    """Enhanced search result with comprehensive metadata"""
    content: str
    summary: str
    source: str
    metadata: Dict[str, Any]
    score: float
    source_type: str  # 'judgment' or 'geneva'
    page_number: Optional[int] = None
    article: Optional[str] = None
    section: Optional[str] = None
    document_type: Optional[str] = None

@dataclass
class RetrievalContext:
    """Enhanced retrieval context with routing information"""
    question: str
    routing_decision: str
    judgment_results: List[SearchResult]
    geneva_results: List[SearchResult]
    all_results: List[SearchResult]
    total_sources: int
    processing_time: float
    
@dataclass
class LegalAnalysis:
    """Structured legal analysis result"""
    question: str
    analysis: str
    sources_used: List[SearchResult]
    key_findings: List[str]
    citations: List[str]
    confidence_score: float
    processing_time: float

print("✅ Enhanced data structures defined")


## Enhanced RAG System Core


In [None]:
class EnhancedICCRAGSystem:
    """Enhanced ICC RAG system with intelligent routing and legal expertise."""
    
    def __init__(self):
        # Initialize clients
        self.vsc = VectorSearchClient()
        self.w = WorkspaceClient()
        self.llm = ChatDatabricks(
            target_uri="databricks",
            endpoint=LLM_MODEL_ENDPOINT,
            temperature=TEMPERATURE,
            max_tokens=MAX_TOKENS
        )
        
        # Conversation memory
        self.conversations = {}
        
        # Legal terminology for query enhancement
        self.legal_expansions = {
            "war crimes": ["war crime", "violations of laws of war", "grave breaches"],
            "crimes against humanity": ["crime against humanity", "systematic attack", "persecution"],
            "persecution": ["persecute", "persecuted", "discriminatory acts", "discriminatory intent"],
            "murder": ["kill", "killing", "unlawful killing", "wilful killing"],
            "active participation": ["direct participation", "hostilities", "combatant status"],
            "civilian status": ["protected person", "civilian population", "non-combatant"],
            "combatant status": ["combatant privilege", "armed forces", "military objective"]
        }
    
    def determine_routing_priority(self, question: str) -> str:
        """Determine which index to prioritize based on question content."""
        question_lower = question.lower()
        
        # Count matches for each topic category
        judgment_matches = sum(1 for topic in LEGAL_TOPICS["judgment_priority"] 
                              if topic in question_lower)
        geneva_matches = sum(1 for topic in LEGAL_TOPICS["geneva_priority"] 
                            if topic in question_lower)
        
        # Determine routing based on matches and question patterns
        if "icty" in question_lower or "trial" in question_lower or "appeal" in question_lower:
            return "judgment"
        elif "geneva" in question_lower or "convention" in question_lower:
            return "geneva"
        elif judgment_matches > geneva_matches and judgment_matches > 0:
            return "judgment"
        elif geneva_matches > judgment_matches and geneva_matches > 0:
            return "geneva"
        elif judgment_matches > 0 and geneva_matches > 0:
            return "both"
        else:
            return "both"  # Default to both if no clear indicators
    
    def enhance_query(self, query: str) -> str:
        """Enhance query for better retrieval using legal terminology."""
        enhanced = query.lower()
        
        # Add legal term expansions
        for term, expansions in self.legal_expansions.items():
            if term in enhanced:
                enhanced += f" {' '.join(expansions[:2])}"
        
        return enhanced

print("✅ Enhanced ICC RAG System core defined")


In [None]:
# Add search methods to the EnhancedICCRAGSystem class
def add_search_methods_to_rag_system():
    """Add search methods to the RAG system class."""
    
    def search_past_judgments(self, query: str, top_k: int = DEFAULT_TOP_K) -> List[SearchResult]:
        """Search past judgments using vector search with enhanced metadata."""
        try:
            results = self.vsc.get_index(VECTOR_SEARCH_ENDPOINT, PAST_JUDGMENTS_INDEX).similarity_search(
                query_text=query,
                columns=["text", "summary", "source_file", "document_type", "section", "pages"],
                num_results=top_k
            )
            
            search_results = []
            for result in results:
                # Extract page number if available
                pages = result.get("pages", [])
                page_number = pages[0] if pages and len(pages) > 0 else None
                
                search_results.append(SearchResult(
                    content=result.get("text", ""),
                    summary=result.get("summary", ""),
                    source=result.get("source_file", "Unknown"),
                    metadata={
                        "document_type": result.get("document_type", ""),
                        "section": result.get("section", ""),
                        "score": result.get("score", 0.0)
                    },
                    score=result.get("score", 0.0),
                    source_type="judgment",
                    page_number=page_number,
                    section=result.get("section", ""),
                    document_type=result.get("document_type", "")
                ))
            
            return search_results
        except Exception as e:
            print(f"Error searching past judgments: {e}")
            return []
    
    def search_geneva_documentation(self, query: str, top_k: int = DEFAULT_TOP_K) -> List[SearchResult]:
        """Search Geneva Convention documentation using vector search."""
        try:
            results = self.vsc.get_index(VECTOR_SEARCH_ENDPOINT, GENEVA_DOCUMENTATION_INDEX).similarity_search(
                query_text=query,
                columns=["text", "summary", "doc_name", "article", "section", "article_type", "pages"],
                num_results=top_k
            )
            
            search_results = []
            for result in results:
                # Extract page number if available
                pages = result.get("pages", [])
                page_number = pages[0] if pages and len(pages) > 0 else None
                
                search_results.append(SearchResult(
                    content=result.get("text", ""),
                    summary=result.get("summary", ""),
                    source=result.get("doc_name", "Unknown"),
                    metadata={
                        "article": result.get("article", ""),
                        "section": result.get("section", ""),
                        "article_type": result.get("article_type", ""),
                        "score": result.get("score", 0.0)
                    },
                    score=result.get("score", 0.0),
                    source_type="geneva",
                    page_number=page_number,
                    article=result.get("article", ""),
                    section=result.get("section", "")
                ))
            
            return search_results
        except Exception as e:
            print(f"Error searching Geneva documentation: {e}")
            return []
    
    # Add methods to the class
    EnhancedICCRAGSystem.search_past_judgments = search_past_judgments
    EnhancedICCRAGSystem.search_geneva_documentation = search_geneva_documentation
    
    print("✅ Search methods added to Enhanced ICC RAG System")

# Execute the function to add methods
add_search_methods_to_rag_system()


In [None]:
# Add retrieval and analysis methods
def add_retrieval_methods_to_rag_system():
    """Add retrieval and analysis methods to the RAG system class."""
    
    def retrieve_context(self, question: str, top_k: int = 5) -> RetrievalContext:
        """Enhanced retrieval function with intelligent routing."""
        start_time = datetime.datetime.now()
        
        print(f"🔍 Retrieving context for: '{question[:100]}...'")
        
        # Determine routing priority
        routing_decision = self.determine_routing_priority(question)
        print(f"📊 Routing decision: {routing_decision}")
        
        # Enhance query
        enhanced_query = self.enhance_query(question)
        
        judgment_results = []
        geneva_results = []
        
        # Search based on routing decision
        if routing_decision in ["judgment", "both"]:
            judgment_results = self.search_past_judgments(enhanced_query, top_k)
            print(f"⚖️ Found {len(judgment_results)} judgment results")
        
        if routing_decision in ["geneva", "both"]:
            geneva_results = self.search_geneva_documentation(enhanced_query, top_k)
            print(f"📜 Found {len(geneva_results)} Geneva Convention results")
        
        # Combine and sort all results by score
        all_results = judgment_results + geneva_results
        all_results.sort(key=lambda x: x.score, reverse=True)
        
        # Filter by similarity threshold
        filtered_results = [r for r in all_results if r.score >= SIMILARITY_THRESHOLD]
        
        processing_time = (datetime.datetime.now() - start_time).total_seconds()
        
        return RetrievalContext(
            question=question,
            routing_decision=routing_decision,
            judgment_results=judgment_results,
            geneva_results=geneva_results,
            all_results=filtered_results,
            total_sources=len(filtered_results),
            processing_time=processing_time
        )
    
    def generate_legal_analysis(self, question: str, context: RetrievalContext, 
                               conversation_id: str = None) -> LegalAnalysis:
        """Generate comprehensive legal analysis using LLM."""
        start_time = datetime.datetime.now()
        
        # Get or create conversation memory
        if conversation_id:
            if conversation_id not in self.conversations:
                self.conversations[conversation_id] = ConversationBufferWindowMemory(
                    k=5, return_messages=True
                )
            memory = self.conversations[conversation_id]
            history = memory.chat_memory.messages
        else:
            history = []
        
        # Format contexts for LLM
        context_text = self._format_contexts_for_analysis(context)
        
        # Create specialized legal system prompt
        system_prompt = """You are an expert legal analyst specializing in International Criminal Court (ICC) and International Criminal Tribunal for the former Yugoslavia (ICTY) proceedings. You are conducting comprehensive legal research for the ICC defense team.

Your expertise includes:
- International criminal law (war crimes, crimes against humanity, genocide)
- ICTY and ICC procedures and legal standards
- International humanitarian law (IHL) and Geneva Conventions
- Legal reasoning and evidence evaluation
- Combatant status, civilian status, and participation in hostilities

Guidelines for your analysis:
1. Base responses strictly on the provided legal context from judgments and conventions
2. Use proper legal terminology and cite specific sections, articles, and page numbers
3. Maintain judicial objectivity and professional legal analysis
4. Provide comprehensive analysis with clear legal reasoning
5. Extract and cite specific paragraphs when requested
6. Identify key legal principles and their application
7. Clearly distinguish between different legal sources (ICTY judgments vs Geneva Conventions)
8. When analyzing subjective vs objective assessments, clearly identify the approach used

Structure your response with:
- Clear legal analysis based on the provided context
- Specific citations with page numbers and sections
- Key findings and legal principles
- Relevant case law and precedents
- Professional legal reasoning

Always provide the full paragraphs when specifically requested and ensure all citations are accurate."""
        
        # Create the prompt
        chat_template = ChatPromptTemplate.from_messages([
            ("system", system_prompt),
            MessagesPlaceholder(variable_name="history"),
            ("human", """
Legal Research Context:
{context}

Legal Research Question: {query}

Please provide a comprehensive legal analysis based on the provided context. If the question requests specific paragraphs, provide them in full with proper citations. Analyze the legal principles, identify key findings, and provide professional legal reasoning.

For questions about subjective vs objective assessment, clearly identify which approach the chamber used and explain the legal reasoning behind it.
""")
        ])
        
        try:
            # Format messages
            messages = chat_template.format_messages(
                context=context_text,
                query=question,
                history=history
            )
            
            # Generate response
            response = self.llm(messages)
            
            # Update memory if conversation_id provided
            if conversation_id:
                memory.chat_memory.add_user_message(question)
                memory.chat_memory.add_ai_message(response.content)
            
            # Extract key findings and citations
            key_findings = self._extract_key_findings(response.content)
            citations = self._extract_citations(response.content)
            
            # Calculate confidence score based on source quality
            confidence_score = self._calculate_confidence_score(context.all_results)
            
            processing_time = (datetime.datetime.now() - start_time).total_seconds()
            
            return LegalAnalysis(
                question=question,
                analysis=response.content,
                sources_used=context.all_results,
                key_findings=key_findings,
                citations=citations,
                confidence_score=confidence_score,
                processing_time=processing_time
            )
            
        except Exception as e:
            return LegalAnalysis(
                question=question,
                analysis=f"Error generating legal analysis: {str(e)}",
                sources_used=context.all_results,
                key_findings=[],
                citations=[],
                confidence_score=0.0,
                processing_time=(datetime.datetime.now() - start_time).total_seconds()
            )
    
    def _format_contexts_for_analysis(self, context: RetrievalContext) -> str:
        """Format contexts for LLM processing with legal structure."""
        if not context.all_results:
            return "No relevant legal context found."
        
        formatted = []
        
        # Add judgment results
        if context.judgment_results:
            formatted.append("=== ICTY/ICC JUDGMENTS ===")
            for i, result in enumerate(context.judgment_results, 1):
                formatted.append(f"""
**Judgment Source {i}** - {result.document_type}
- **Document**: {result.source}
- **Section**: {result.section}
- **Pages**: {result.page_number if result.page_number else 'N/A'}
- **Relevance Score**: {result.score:.3f}

**Content**:
{result.content}

**Summary**: {result.summary}
---""")
        
        # Add Geneva Convention results
        if context.geneva_results:
            formatted.append("\n=== GENEVA CONVENTIONS ===")
            for i, result in enumerate(context.geneva_results, 1):
                formatted.append(f"""
**Geneva Source {i}** - {result.metadata.get('article_type', 'N/A')}
- **Document**: {result.source}
- **Article**: {result.article}
- **Section**: {result.section}
- **Pages**: {result.page_number if result.page_number else 'N/A'}
- **Relevance Score**: {result.score:.3f}

**Content**:
{result.content}

**Summary**: {result.summary}
---""")
        
        return "\n".join(formatted)
    
    def _extract_key_findings(self, analysis: str) -> List[str]:
        """Extract key legal findings from the analysis."""
        # Simple extraction - in production, use more sophisticated NLP
        findings = []
        lines = analysis.split('\n')
        for line in lines:
            if any(keyword in line.lower() for keyword in ['finding', 'principle', 'rule', 'established', 'determined']):
                if len(line.strip()) > 20:  # Avoid very short lines
                    findings.append(line.strip())
        return findings[:5]  # Top 5 findings
    
    def _extract_citations(self, analysis: str) -> List[str]:
        """Extract legal citations from the analysis."""
        # Extract patterns like "Article X", "Page Y", "Section Z"
        citation_patterns = [
            r'Article\s+\d+[a-zA-Z]?',
            r'Page\s+\d+',
            r'Section\s+[A-Z0-9]+',
            r'ICC-[0-9]+-[0-9]+',
            r'ICTY-[0-9]+-[0-9]+'
        ]
        
        citations = []
        for pattern in citation_patterns:
            matches = re.findall(pattern, analysis, re.IGNORECASE)
            citations.extend(matches)
        
        return list(set(citations))  # Remove duplicates
    
    def _calculate_confidence_score(self, sources: List[SearchResult]) -> float:
        """Calculate confidence score based on source quality."""
        if not sources:
            return 0.0
        
        # Base score on average relevance and number of sources
        avg_score = sum(s.score for s in sources) / len(sources)
        source_bonus = min(len(sources) / 10, 0.3)  # Bonus for more sources, capped at 0.3
        
        return min(avg_score + source_bonus, 1.0)
    
    # Add methods to the class
    EnhancedICCRAGSystem.retrieve_context = retrieve_context
    EnhancedICCRAGSystem.generate_legal_analysis = generate_legal_analysis
    EnhancedICCRAGSystem._format_contexts_for_analysis = _format_contexts_for_analysis
    EnhancedICCRAGSystem._extract_key_findings = _extract_key_findings
    EnhancedICCRAGSystem._extract_citations = _extract_citations
    EnhancedICCRAGSystem._calculate_confidence_score = _calculate_confidence_score
    
    print("✅ Retrieval and analysis methods added to Enhanced ICC RAG System")

# Execute the function to add methods
add_retrieval_methods_to_rag_system()


## Test Legal Research Questions


In [None]:
# Test the Enhanced RAG System with complex legal research questions
def test_enhanced_rag_system():
    """Test the enhanced RAG system with the provided legal research questions."""
    
    # Initialize the system
    rag_system = EnhancedICCRAGSystem()
    
    # Complex legal research queries
    test_questions = [
        {
            "question": "Can you please go through all the ICTY trial judgments and appeal judgments and identify where the chamber discusses the status of an individual during the conflict. In particular, please identify all relevant paragraphs where the chamber refers to the active or direct participation of the individual or where the chamber discusses the civilian status or combatant status of an individual. Please provide the direct paragraph in full.",
            "expected_routing": "judgment",
            "key_topics": ["active participation", "direct participation", "civilian status", "combatant status", "ICTY", "trial judgments", "appeal judgments"]
        },
        {
            "question": "Can you please go through all the ICTY trial judgments and appeal judgments and identify which factors the Trial or Appeals Chamber relied on in order to assess whether an individual is actively or directly participating in hostilities at a particular point? Please provide the full paragraph and citations",
            "expected_routing": "judgment", 
            "key_topics": ["factors", "assessment", "actively participating", "directly participating", "hostilities", "Trial Chamber", "Appeals Chamber", "citations"]
        },
        {
            "question": "Can you please search through all the ICTY trial judgments and appeal judgments and identify relevant paragraphs which would support the proposition that an individual who has previously joined enemy forces and is armed at the relevant point is considered to have lost their protected status at a particular point? Please determine whether the chamber undertakes a subjective or objective assessment?",
            "expected_routing": "judgment",
            "key_topics": ["enemy forces", "armed", "protected status", "subjective assessment", "objective assessment", "lost status"]
        }
    ]
    
    print("🧪 TESTING ENHANCED ICC RAG SYSTEM")
    print("=" * 80)
    
    results = []
    
    for i, query_info in enumerate(test_questions, 1):
        print(f"\n{'#'*80}")
        print(f"LEGAL RESEARCH QUESTION {i}")
        print(f"{'#'*80}")
        print(f"Question: {query_info['question'][:150]}...")
        print(f"Expected routing: {query_info['expected_routing']}")
        print(f"Key topics: {', '.join(query_info['key_topics'])}")
        
        # Retrieve context
        context = rag_system.retrieve_context(query_info["question"], top_k=8)
        
        # Generate legal analysis
        analysis = rag_system.generate_legal_analysis(
            query_info["question"], 
            context, 
            conversation_id=f"test_session_{i}"
        )
        
        # Display results
        print(f"\n📊 ROUTING ANALYSIS:")
        print(f"Expected: {query_info['expected_routing']}")
        print(f"Actual: {context.routing_decision}")
        print(f"Sources found: {context.total_sources}")
        print(f"Processing time: {context.processing_time:.2f}s")
        
        print(f"\n⚖️ LEGAL ANALYSIS:")
        print(f"Confidence score: {analysis.confidence_score:.3f}")
        print(f"Key findings: {len(analysis.key_findings)}")
        print(f"Citations: {len(analysis.citations)}")
        print(f"Analysis length: {len(analysis.analysis)} characters")
        
        print(f"\n📝 ANALYSIS PREVIEW:")
        print(analysis.analysis[:500] + "..." if len(analysis.analysis) > 500 else analysis.analysis)
        
        print(f"\n🔍 KEY FINDINGS:")
        for j, finding in enumerate(analysis.key_findings[:3], 1):
            print(f"{j}. {finding}")
        
        print(f"\n📚 CITATIONS:")
        for j, citation in enumerate(analysis.citations[:5], 1):
            print(f"{j}. {citation}")
        
        results.append({
            "question_id": i,
            "question": query_info["question"],
            "routing_decision": context.routing_decision,
            "sources_found": context.total_sources,
            "confidence_score": analysis.confidence_score,
            "analysis_length": len(analysis.analysis),
            "key_findings_count": len(analysis.key_findings),
            "citations_count": len(analysis.citations),
            "processing_time": context.processing_time + analysis.processing_time
        })
        
        print(f"\n{'#'*80}\n")
    
    # Summary
    print("📊 TEST SUMMARY")
    print("=" * 50)
    for result in results:
        print(f"Question {result['question_id']}: {result['routing_decision']} routing, "
              f"{result['sources_found']} sources, {result['confidence_score']:.3f} confidence, "
              f"{result['processing_time']:.2f}s")
    
    return results

# Run the test
test_results = test_enhanced_rag_system()


## MLflow 3.0 Production Model


In [None]:
class EnhancedICCRAGModel(mlflow.pyfunc.PythonModel):
    """MLflow 3.0 production model wrapper for Enhanced ICC RAG System."""
    
    def load_context(self, context):
        """Initialize the enhanced RAG system."""
        self.rag_system = EnhancedICCRAGSystem()
    
    def predict(self, context, model_input: pd.DataFrame) -> List[Dict]:
        """Handle predictions for serving endpoint."""
        try:
            queries = model_input["query"].tolist()
            
            # Extract optional parameters
            num_results_list = model_input.get("num_results", [8] * len(queries)).tolist()
            conversation_ids = model_input.get("conversation_id", [None] * len(queries)).tolist()
            
            results = []
            for query, num_results, conv_id in zip(queries, num_results_list, conversation_ids):
                try:
                    # Retrieve context
                    context = self.rag_system.retrieve_context(
                        query=query,
                        top_k=num_results if pd.notna(num_results) else 8
                    )
                    
                    # Generate legal analysis
                    analysis = self.rag_system.generate_legal_analysis(
                        question=query,
                        context=context,
                        conversation_id=conv_id if pd.notna(conv_id) else None
                    )
                    
                    # Format response
                    result = {
                        "question": query,
                        "analysis": analysis.analysis,
                        "routing_decision": context.routing_decision,
                        "sources_used": len(analysis.sources_used),
                        "confidence_score": analysis.confidence_score,
                        "key_findings": analysis.key_findings,
                        "citations": analysis.citations,
                        "processing_time_seconds": context.processing_time + analysis.processing_time,
                        "conversation_id": conv_id,
                        "sources": [
                            {
                                "source": s.source,
                                "source_type": s.source_type,
                                "section": s.section,
                                "page_number": s.page_number,
                                "article": s.article,
                                "relevance_score": round(s.score, 3)
                            }
                            for s in analysis.sources_used[:10]  # Top 10 sources
                        ]
                    }
                    results.append(result)
                    
                except Exception as e:
                    # Handle individual query errors
                    error_result = {
                        "question": query,
                        "analysis": f"Error processing query: {str(e)}",
                        "routing_decision": "error",
                        "sources_used": 0,
                        "confidence_score": 0.0,
                        "key_findings": [],
                        "citations": [],
                        "processing_time_seconds": 0,
                        "conversation_id": conv_id,
                        "sources": []
                    }
                    results.append(error_result)
            
            return results
            
        except Exception as e:
            return [{"error": f"Model error: {str(e)}"}] * len(model_input)

print("✅ Enhanced ICC RAG Model for MLflow 3.0 defined")


In [None]:
# Register the Enhanced ICC RAG Model in MLflow 3.0
with mlflow.start_run(run_name="Enhanced_ICC_RAG_Production") as run:
    
    # Create model instance
    production_model = EnhancedICCRAGModel()
    
    # Input example for serving endpoint
    input_example = pd.DataFrame({
        "query": [
            "Can you please go through all the ICTY trial judgments and identify where the chamber discusses the status of an individual during the conflict?",
            "What factors did the Trial Chamber rely on to assess active participation in hostilities?"
        ],
        "num_results": [10, 12],
        "conversation_id": ["legal_research_001", "legal_research_001"]
    })
    
    # Expected output format
    output_example = [
        {
            "question": "Sample legal question",
            "analysis": "Comprehensive legal analysis based on retrieved context...",
            "routing_decision": "judgment",
            "sources_used": 8,
            "confidence_score": 0.85,
            "key_findings": ["Key legal finding 1", "Key legal finding 2"],
            "citations": ["Article 8", "Page 123", "Section A"],
            "processing_time_seconds": 5.2,
            "conversation_id": "legal_research_001",
            "sources": [
                {
                    "source": "ICTY_Judgment_001.pdf",
                    "source_type": "judgment",
                    "section": "FINDINGS_OF_FACT",
                    "page_number": 123,
                    "article": None,
                    "relevance_score": 0.95
                }
            ]
        }
    ]
    
    # Log the model using MLflow 3.0 syntax
    mlflow.pyfunc.log_model(
        name="enhanced_icc_rag_model",
        python_model=production_model,
        input_example=input_example,
        signature=infer_signature(input_example, output_example),
        resources=[
            DatabricksVectorSearchIndex(index_name=PAST_JUDGMENTS_INDEX),
            DatabricksVectorSearchIndex(index_name=GENEVA_DOCUMENTATION_INDEX),
            DatabricksServingEndpoint(endpoint_name=LLM_MODEL_ENDPOINT)
        ],
        pip_requirements=[
            "mlflow>=3.1.1",
            "langchain",
            "databricks-langchain",
            "numpy",
            "pandas",
            "pydantic"
        ]
    )
    
    # Register model in Unity Catalog
    model_uri = f"runs:/{run.info.run_id}/enhanced_icc_rag_model"
    registered_model = mlflow.register_model(
        model_uri=model_uri,
        name="enhanced_icc_rag_legal_research"
    )
    
    print(f"✅ Model logged: {run.info.run_id}")
    print(f"🔗 Model URI: {model_uri}")
    print(f"📦 Model registered: {registered_model.name} v{registered_model.version}")
    print(f"🌐 View in Unity Catalog: https://dbc-0619d7f5-0bda.cloud.databricks.com/explore/data/models/{registered_model.name}/version/{registered_model.version}")


## Usage Examples & Deployment Instructions


In [None]:
def show_usage_examples():
    """Show comprehensive usage examples for the Enhanced ICC RAG System."""
    
    print("🚀 ENHANCED ICC RAG SYSTEM - USAGE EXAMPLES")
    print("=" * 60)
    
    print("\n📋 1. LOCAL USAGE:")
    print("""
# Initialize the system
rag_system = EnhancedICCRAGSystem()

# Simple legal research query
question = "What are the elements of crimes against humanity?"
context = rag_system.retrieve_context(question, top_k=8)
analysis = rag_system.generate_legal_analysis(question, context)

print(f"Analysis: {analysis.analysis}")
print(f"Confidence: {analysis.confidence_score}")
print(f"Sources: {len(analysis.sources_used)}")
""")
    
    print("\n📋 2. CONVERSATIONAL USAGE:")
    print("""
# Multi-turn conversation
conversation_id = "legal_research_session_001"

# First question
question1 = "How has the principle of proportionality been applied in ICTY judgments?"
context1 = rag_system.retrieve_context(question1, top_k=10)
analysis1 = rag_system.generate_legal_analysis(question1, context1, conversation_id)

# Follow-up question (with memory)
question2 = "What factors did the chamber consider in those cases?"
context2 = rag_system.retrieve_context(question2, top_k=8)
analysis2 = rag_system.generate_legal_analysis(question2, context2, conversation_id)
""")
    
    print("\n📋 3. SERVING ENDPOINT USAGE:")
    print("""
# Deploy to serving endpoint
import requests

endpoint_url = "https://your-workspace.cloud.databricks.com/serving-endpoints/enhanced-icc-rag/invocations"
headers = {"Authorization": "Bearer YOUR_TOKEN"}

# Single query
payload = {
    "dataframe_split": {
        "columns": ["query", "num_results", "conversation_id"],
        "data": [["What are the requirements for combatant status?", 10, "session_001"]]
    }
}

response = requests.post(endpoint_url, headers=headers, json=payload)
result = response.json()["predictions"][0]

print(f"Analysis: {result['analysis']}")
print(f"Routing: {result['routing_decision']}")
print(f"Sources: {result['sources_used']}")
print(f"Confidence: {result['confidence_score']}")
""")
    
    print("\n📋 4. BATCH PROCESSING:")
    print("""
# Multiple legal research questions
batch_payload = {
    "dataframe_split": {
        "columns": ["query", "num_results", "conversation_id"],
        "data": [
            ["Can you identify all ICTY judgments discussing civilian status?", 12, "batch_001"],
            ["What factors determine active participation in hostilities?", 10, "batch_001"],
            ["How do chambers assess subjective vs objective criteria?", 8, "batch_001"]
        ]
    }
}

response = requests.post(endpoint_url, headers=headers, json=batch_payload)
results = response.json()["predictions"]

for i, result in enumerate(results, 1):
    print(f"Question {i}: {result['routing_decision']} routing, {result['sources_used']} sources")
""")
    
    print("\n📋 5. DEPLOYMENT INSTRUCTIONS:")
    print("""
# Step 1: Create serving endpoint
# Go to Databricks UI > Serving > Create Endpoint
# Select the registered model: enhanced_icc_rag_legal_research
# Configure compute and scaling

# Step 2: Test endpoint
# Use the test queries provided above
# Monitor performance and adjust scaling as needed

# Step 3: Integration
# Integrate with your legal research workflow
# Use conversation_id for multi-turn research sessions
# Monitor confidence scores for quality assurance
""")
    
    print("\n📋 6. OPTIMAL CONFIGURATION:")
    print("""
# Query types and recommended num_results:
# - Complex legal research: 10-15
# - Specific case law queries: 8-12  
# - Geneva Convention queries: 6-10
# - Factual questions: 4-8

# Routing decisions:
# - "judgment": ICTY/ICC case law queries
# - "geneva": International humanitarian law queries  
# - "both": Comparative legal analysis

# Confidence scores:
# - >0.8: High confidence, reliable analysis
# - 0.6-0.8: Good confidence, review recommended
# - <0.6: Low confidence, additional research needed
""")

# Show usage examples
show_usage_examples()
