In [3]:
import os
import json
import pickle
import re
from collections import defaultdict, Counter
from typing import Set, List, Dict, Tuple

class BooleanRetrievalSystem:
    def __init__(self, corpus_folder: str):
        """
        Initialize Boolean Retrieval System
        
        Args:
            corpus_folder: Path to the cleaned_corpus folder
        """
        self.corpus_folder = corpus_folder
        self.index_folder = os.path.join(corpus_folder, "boolean_index")
        
        # Create index folder if it doesn't exist
        if not os.path.exists(self.index_folder):
            os.makedirs(self.index_folder)
        
        # Data structures
        self.inverted_index = defaultdict(set)  # term -> set of doc_ids
        self.documents = {}  # doc_id -> document info
        self.term_frequencies = defaultdict(Counter)  # doc_id -> term -> frequency
        self.doc_lengths = {}  # doc_id -> number of tokens
        
        # Statistics
        self.stats = {
            'total_documents': 0,
            'total_terms': 0,
            'vocabulary_size': 0
        }
        
        # Query parsing cache
        self.query_cache = {}
        
        # Document name to ID mapping for easier lookup
        self.doc_name_to_id = {}
    
    def build_index(self):
        """Build inverted index from corpus"""
        print("üî® Building Boolean Retrieval Index...")
        
        # First, check what files are available
        print(f"üìÅ Checking corpus folder: {self.corpus_folder}")
        
        # Look for document tokens file in different locations
        possible_token_files = [
            os.path.join(self.corpus_folder, "document_tokens.json"),
            os.path.join(self.corpus_folder, "..", "document_tokens.json"),
            os.path.join(self.corpus_folder, "statistics", "document_tokens.json"),
        ]
        
        doc_tokens_file = None
        for file_path in possible_token_files:
            if os.path.exists(file_path):
                doc_tokens_file = file_path
                print(f"‚úÖ Found document tokens: {file_path}")
                break
        
        if not doc_tokens_file:
            print("‚ùå document_tokens.json not found!")
            print("Available files in corpus folder:")
            for root, dirs, files in os.walk(self.corpus_folder):
                for file in files[:20]:  # Show first 20 files
                    print(f"  ‚Ä¢ {os.path.join(root, file)}")
                if len(files) > 20:
                    print(f"  ... and {len(files) - 20} more files")
            return False
        
        # Load document tokens
        try:
            with open(doc_tokens_file, 'r', encoding='utf-8') as f:
                doc_data = json.load(f)
        except Exception as e:
            print(f"‚ùå Error loading document tokens: {e}")
            return False
        
        # Process each document
        doc_id = 0
        for doc_name, doc_info in doc_data.items():
            tokens = doc_info.get('tokens', [])
            token_count = doc_info.get('token_count', 0)
            
            if tokens and token_count > 0:
                doc_id += 1
                doc_key = f"doc_{doc_id:04d}"  # Format as doc_0001, doc_0002, etc.
                
                # Store document info
                self.documents[doc_key] = {
                    'name': doc_name,
                    'token_count': token_count
                }
                
                # Store name to ID mapping for quick lookup
                self.doc_name_to_id[doc_name] = doc_key
                
                # Build inverted index and term frequencies
                term_counter = Counter(tokens)
                self.term_frequencies[doc_key] = term_counter
                self.doc_lengths[doc_key] = token_count
                
                # Add to inverted index
                for term in term_counter:
                    self.inverted_index[term].add(doc_key)
        
        # Update statistics
        self.stats['total_documents'] = len(self.documents)
        self.stats['vocabulary_size'] = len(self.inverted_index)
        self.stats['total_terms'] = sum(self.doc_lengths.values())
        
        # Save index
        self.save_index()
        
        print(f"\n‚úÖ Index built successfully!")
        print(f"   Documents: {self.stats['total_documents']:,}")
        print(f"   Vocabulary: {self.stats['vocabulary_size']:,}")
        print(f"   Total terms: {self.stats['total_terms']:,}")
        
        # Show sample terms
        sample_terms = list(self.inverted_index.keys())[:10]
        print(f"   Sample terms: {', '.join(sample_terms)}...")
        
        return True
    
    def save_index(self):
        """Save index to disk"""
        index_data = {
            'inverted_index': {k: list(v) for k, v in self.inverted_index.items()},
            'documents': self.documents,
            'term_frequencies': {k: dict(v) for k, v in self.term_frequencies.items()},
            'doc_lengths': self.doc_lengths,
            'stats': self.stats,
            'doc_name_to_id': self.doc_name_to_id
        }
        
        index_file = os.path.join(self.index_folder, "boolean_index.pkl")
        with open(index_file, 'wb') as f:
            pickle.dump(index_data, f)
        
        print(f"\nüíæ Index saved to: {index_file}")
    
    def load_index(self):
        """Load index from disk"""
        index_file = os.path.join(self.index_folder, "boolean_index.pkl")
        
        if not os.path.exists(index_file):
            print("‚ùå Index not found. Building index...")
            return self.build_index()
        
        try:
            with open(index_file, 'rb') as f:
                index_data = pickle.load(f)
            
            self.inverted_index = defaultdict(set)
            for k, v in index_data['inverted_index'].items():
                self.inverted_index[k] = set(v)
            
            self.documents = index_data['documents']
            self.term_frequencies = defaultdict(Counter)
            for k, v in index_data['term_frequencies'].items():
                self.term_frequencies[k] = Counter(v)
            
            self.doc_lengths = index_data['doc_lengths']
            self.stats = index_data['stats']
            self.doc_name_to_id = index_data.get('doc_name_to_id', {})
            
            print(f"‚úÖ Index loaded successfully!")
            print(f"   Documents: {self.stats['total_documents']:,}")
            print(f"   Vocabulary: {self.stats['vocabulary_size']:,}")
            
            return True
        except Exception as e:
            print(f"‚ùå Error loading index: {e}")
            print("Building fresh index...")
            return self.build_index()
    
    def boolean_and(self, term1_docs: Set[str], term2_docs: Set[str]) -> Set[str]:
        """Boolean AND operation (intersection)"""
        if not term1_docs or not term2_docs:
            return set()
        return term1_docs.intersection(term2_docs)
    
    def boolean_or(self, term1_docs: Set[str], term2_docs: Set[str]) -> Set[str]:
        """Boolean OR operation (union)"""
        if not term1_docs:
            return term2_docs.copy() if term2_docs else set()
        if not term2_docs:
            return term1_docs.copy() if term1_docs else set()
        return term1_docs.union(term2_docs)
    
    def boolean_not(self, term_docs: Set[str]) -> Set[str]:
        """Boolean NOT operation (complement)"""
        all_docs = set(self.documents.keys())
        return all_docs.difference(term_docs)
    
    def get_docs_for_term(self, term: str) -> Set[str]:
        """Get documents containing a term"""
        term_lower = term.lower().strip()
        if term_lower in self.inverted_index:
            return self.inverted_index[term_lower].copy()
        return set()
    
    def simple_parse_query(self, query: str) -> Tuple[str, Set[str]]:
        """
        Simple query parser for boolean operations
        Supports: AND, OR, NOT, parentheses
        """
        # Clean query
        query = query.strip().lower()
        
        # Check cache
        if query in self.query_cache:
            return self.query_cache[query]
        
        # Handle parentheses first
        if '(' in query and ')' in query:
            # Find innermost parentheses
            start = query.find('(')
            end = query.rfind(')')
            if start < end:
                inner_query = query[start+1:end]
                # Parse inner query
                inner_op, inner_result = self.simple_parse_query(inner_query)
                
                # Replace parentheses with placeholder
                placeholder = f"__result_{len(inner_result)}__"
                new_query = query[:start] + placeholder + query[end+1:]
                
                # Parse the new query
                return self.simple_parse_query(new_query)
        
        # Handle NOT operations
        if query.startswith('not '):
            term = query[4:].strip()
            docs = self.get_docs_for_term(term)
            result = self.boolean_not(docs)
            self.query_cache[query] = ("NOT", result)
            return "NOT", result
        
        # Handle AND operations
        if ' and ' in query:
            parts = [p.strip() for p in query.split(' and ')]
            result = None
            for part in parts:
                if part.startswith('not '):
                    # Handle AND NOT
                    term = part[4:].strip()
                    term_docs = self.get_docs_for_term(term)
                    not_docs = self.boolean_not(term_docs)
                    if result is None:
                        result = not_docs
                    else:
                        result = self.boolean_and(result, not_docs)
                else:
                    term_docs = self.get_docs_for_term(part)
                    if result is None:
                        result = term_docs
                    else:
                        result = self.boolean_and(result, term_docs)
            self.query_cache[query] = ("AND", result if result else set())
            return "AND", result if result else set()
        
        # Handle OR operations
        if ' or ' in query:
            parts = [p.strip() for p in query.split(' or ')]
            result = set()
            for part in parts:
                if part.startswith('not '):
                    term = part[4:].strip()
                    term_docs = self.get_docs_for_term(term)
                    not_docs = self.boolean_not(term_docs)
                    result = self.boolean_or(result, not_docs)
                else:
                    term_docs = self.get_docs_for_term(part)
                    result = self.boolean_or(result, term_docs)
            self.query_cache[query] = ("OR", result)
            return "OR", result
        
        # Single term query
        docs = self.get_docs_for_term(query)
        self.query_cache[query] = ("TERM", docs)
        return "TERM", docs
    
    def search(self, query: str) -> List[Dict]:
        """
        Execute boolean search query
        
        Returns:
            List of document information for matching documents
        """
        if not query or not query.strip():
            print("‚ùå Empty query")
            return []
        
        operation, result_docs = self.simple_parse_query(query)
        
        print(f"\nüìä Query: '{query}'")
        print(f"   Operation: {operation}")
        print(f"   Documents found: {len(result_docs)}")
        
        if not result_docs:
            return []
        
        # Convert to list of document info
        results = []
        for doc_id in result_docs:
            doc_info = self.documents.get(doc_id)
            if doc_info:
                results.append({
                    'doc_id': doc_id,
                    'name': doc_info['name'],
                    'token_count': doc_info['token_count']
                })
        
        # Sort by document name
        results.sort(key=lambda x: x['name'])
        
        return results
    
    def show_document_text(self, doc_name: str, preview_lines: int = 10):
        """Show text of a specific document"""
        # First, check if we have cleaned_docs folder
        cleaned_docs_folder = os.path.join(self.corpus_folder, "cleaned_docs")
        
        if not os.path.exists(cleaned_docs_folder):
            # Try alternative locations
            possible_locations = [
                os.path.join(self.corpus_folder, "..", "cleaned_docs"),
                os.path.join(self.corpus_folder, "..", "..", "cleaned_corpus", "cleaned_docs"),
                os.path.join(os.path.dirname(self.corpus_folder), "cleaned_docs"),
            ]
            
            for location in possible_locations:
                if os.path.exists(location):
                    cleaned_docs_folder = location
                    break
        
        doc_path = os.path.join(cleaned_docs_folder, doc_name)
        
        if not os.path.exists(doc_path):
            # Try to find the document in the original text folder
            print(f"‚ùå Document not found in cleaned_docs: {doc_name}")
            print("Looking for document in alternative locations...")
            
            # Try original text folder
            original_folder = r"C:\Users\Armaghan Rafique\Desktop\AI Project\supreme_court_judgements_txt"
            if os.path.exists(original_folder):
                original_path = os.path.join(original_folder, doc_name)
                if os.path.exists(original_path):
                    doc_path = original_path
                    print(f"‚úÖ Found document in original folder")
                else:
                    print(f"‚ùå Document not found in: {original_folder}")
                    return
            else:
                print(f"‚ùå Original folder not found: {original_folder}")
                return
        
        try:
            with open(doc_path, 'r', encoding='utf-8', errors='ignore') as f:
                content = f.read()
            
            print(f"\n" + "=" * 80)
            print(f"üìÑ DOCUMENT PREVIEW: {doc_name}")
            print("=" * 80)
            
            # Show metadata if present
            if content.startswith('='):
                # Extract metadata
                lines = content.split('\n')
                meta_end = 0
                for i, line in enumerate(lines):
                    if 'TEXT CONTENT:' in line or 'TEXT:' in line:
                        meta_end = i + 1
                        break
                
                if meta_end > 0:
                    print("\nüìã METADATA:")
                    for line in lines[:min(meta_end, 8)]:
                        if line.strip():
                            print(f"  {line}")
            
            # Show actual text content
            print("\nüìù TEXT CONTENT (first {} lines):".format(preview_lines))
            print("-" * 80)
            
            # Split into lines and show content
            lines = content.split('\n')
            text_start = 0
            for i, line in enumerate(lines):
                if 'TEXT CONTENT:' in line or 'TEXT:' in line:
                    text_start = i + 2  # Skip the separator line
                    break
            
            lines_to_show = lines[text_start:text_start + preview_lines]
            for i, line in enumerate(lines_to_show):
                if line.strip():
                    # Clean and truncate line
                    clean_line = re.sub(r'\s+', ' ', line.strip())
                    if len(clean_line) > 120:
                        print(f"{i+1:3d}. {clean_line[:117]}...")
                    else:
                        print(f"{i+1:3d}. {clean_line}")
            
            if len(lines) > text_start + preview_lines:
                print(f"\n... and {len(lines) - (text_start + preview_lines)} more lines")
            
            print("=" * 80)
            
        except Exception as e:
            print(f"‚ùå Error reading document: {e}")
    
    def get_term_statistics(self, term: str):
        """Get statistics for a specific term"""
        term_lower = term.lower()
        
        if term_lower not in self.inverted_index:
            print(f"\n‚ùå Term '{term}' not found in vocabulary")
            print(f"   Similar terms: {[t for t in self.inverted_index.keys() if term_lower in t][:5]}")
            return
        
        docs = self.inverted_index[term_lower]
        total_freq = 0
        
        print(f"\nüìä STATISTICS FOR TERM: '{term}'")
        print("=" * 50)
        print(f"   Document frequency: {len(docs):,} documents")
        
        # Calculate total frequency across all documents
        for doc_id in docs:
            total_freq += self.term_frequencies[doc_id].get(term_lower, 0)
        
        print(f"   Total frequency: {total_freq:,} occurrences")
        print(f"   Average per document: {total_freq/len(docs):.1f} occurrences")
        
        # Show top documents containing the term
        print(f"\nüìÑ TOP DOCUMENTS CONTAINING '{term}':")
        doc_freqs = []
        for doc_id in docs:
            freq = self.term_frequencies[doc_id].get(term_lower, 0)
            doc_name = self.documents[doc_id]['name']
            doc_freqs.append((doc_name, freq, doc_id))
        
        # Sort by frequency (descending)
        doc_freqs.sort(key=lambda x: x[1], reverse=True)
        
        for i, (doc_name, freq, doc_id) in enumerate(doc_freqs[:10], 1):
            doc_info = self.documents[doc_id]
            print(f"   {i:2d}. {doc_name}")
            print(f"       Frequency: {freq} | Total tokens: {doc_info['token_count']:,}")
        
        if len(doc_freqs) > 10:
            print(f"   ... and {len(doc_freqs) - 10} more documents")
        
        print("=" * 50)
    
    def find_documents_with_term(self, term: str, limit: int = 20):
        """Find documents containing a specific term"""
        term_lower = term.lower()
        
        if term_lower not in self.inverted_index:
            print(f"‚ùå Term '{term}' not found")
            return []
        
        docs = list(self.inverted_index[term_lower])
        print(f"\nüîç Found {len(docs)} documents containing '{term}':")
        
        results = []
        for i, doc_id in enumerate(docs[:limit], 1):
            doc_info = self.documents[doc_id]
            freq = self.term_frequencies[doc_id].get(term_lower, 0)
            print(f"  {i:3d}. {doc_info['name']} ({freq} occurrences)")
            results.append({
                'name': doc_info['name'],
                'frequency': freq,
                'token_count': doc_info['token_count']
            })
        
        if len(docs) > limit:
            print(f"  ... and {len(docs) - limit} more documents")
        
        return results
    
    def interactive_search(self):
        """Interactive search interface"""
        print("\n" + "=" * 80)
        print("üîç BOOLEAN RETRIEVAL SYSTEM - INTERACTIVE SEARCH")
        print("=" * 80)
        print("\nüìã Available Commands:")
        print("  ‚Ä¢ search <query>      - Search for documents")
        print("  ‚Ä¢ stats <term>        - Get statistics for a term")
        print("  ‚Ä¢ find <term>         - Find documents containing a term")
        print("  ‚Ä¢ preview <doc_name>  - Preview a specific document")
        print("  ‚Ä¢ terms               - Show most common terms")
        print("  ‚Ä¢ quit                - Exit the program")
        print("\nüìù Query Examples:")
        print("  ‚Ä¢ evidence")
        print("  ‚Ä¢ murder AND evidence")
        print("  ‚Ä¢ murder OR homicide")
        print("  ‚Ä¢ murder AND NOT evidence")
        print("  ‚Ä¢ (murder OR killing) AND weapon")
        print("=" * 80)
        
        while True:
            user_input = input("\nüéØ Enter command: ").strip()
            
            if not user_input:
                continue
            
            if user_input.lower() == 'quit':
                print("üëã Goodbye!")
                break
            
            elif user_input.lower() == 'terms':
                # Show most common terms
                common_terms = []
                for term, docs in self.inverted_index.items():
                    total_freq = 0
                    for doc_id in docs:
                        total_freq += self.term_frequencies[doc_id].get(term, 0)
                    common_terms.append((term, len(docs), total_freq))
                
                # Sort by document frequency
                common_terms.sort(key=lambda x: x[1], reverse=True)
                
                print(f"\nüìä TOP 20 MOST COMMON TERMS:")
                print("=" * 60)
                print(f"{'Term':<20} {'Docs':<10} {'Total Freq':<12}")
                print("-" * 60)
                for term, doc_count, total_freq in common_terms[:20]:
                    print(f"{term:<20} {doc_count:<10,} {total_freq:<12,}")
                print("=" * 60)
            
            elif user_input.lower().startswith('search '):
                query = user_input[7:].strip()
                if query:
                    results = self.search(query)
                    if results:
                        print(f"\n‚úÖ Found {len(results)} document(s):")
                        for i, doc in enumerate(results, 1):
                            print(f"  {i:3d}. {doc['name']} ({doc['token_count']:,} tokens)")
                        
                        # Ask for preview
                        if results:
                            preview_choice = input("\nüìñ Preview a document? (enter number or 'n'): ").strip()
                            if preview_choice.lower() != 'n' and preview_choice.isdigit():
                                idx = int(preview_choice) - 1
                                if 0 <= idx < len(results):
                                    self.show_document_text(results[idx]['name'])
                    else:
                        print("‚ùå No documents found")
                else:
                    print("‚ùå Please enter a search query")
            
            elif user_input.lower().startswith('stats '):
                term = user_input[6:].strip()
                if term:
                    self.get_term_statistics(term)
                else:
                    print("‚ùå Please enter a term")
            
            elif user_input.lower().startswith('find '):
                term = user_input[5:].strip()
                if term:
                    self.find_documents_with_term(term)
                else:
                    print("‚ùå Please enter a term")
            
            elif user_input.lower().startswith('preview '):
                doc_name = user_input[8:].strip()
                if doc_name:
                    self.show_document_text(doc_name)
                else:
                    print("‚ùå Please enter a document name")
            
            else:
                # Try as a search query
                results = self.search(user_input)
                if results:
                    print(f"\n‚úÖ Found {len(results)} document(s):")
                    for i, doc in enumerate(results[:20], 1):
                        print(f"  {i:3d}. {doc['name']} ({doc['token_count']:,} tokens)")
                    if len(results) > 20:
                        print(f"  ... and {len(results) - 20} more documents")
                else:
                    print(f"‚ùå No results found for: {user_input}")
                    print("   Type 'help' to see available commands")
    
    def test_queries(self):
        """Test various boolean queries"""
        test_queries = [
            ("Single term", "evidence"),
            ("AND operation", "murder AND evidence"),
            ("OR operation", "murder OR homicide"),
            ("NOT operation", "NOT murder"),
            ("Complex query", "(murder OR homicide) AND evidence"),
        ]
        
        print("\nüß™ TESTING BOOLEAN QUERIES")
        print("=" * 80)
        
        for test_name, query in test_queries:
            print(f"\nüìã Test: {test_name}")
            print(f"   Query: {query}")
            
            results = self.search(query)
            
            if results:
                print(f"   ‚úÖ Found {len(results)} documents")
                print(f"   Sample documents:")
                for doc in results[:3]:
                    print(f"     ‚Ä¢ {doc['name']} ({doc['token_count']:,} tokens)")
                if len(results) > 3:
                    print(f"     ... and {len(results) - 3} more")
            else:
                print(f"   ‚ùå No documents found")
            
            print("-" * 40)

def find_corpus_folder():
    """Find the corpus folder automatically"""
    possible_paths = [
        # Primary location
        r"C:\Users\Armaghan Rafique\Desktop\AI Project\cleaned_corpus",
        # Alternative locations
        r"C:\Users\Armaghan Rafique\Desktop\AI Project\supreme_court_judgements_txt\cleaned_corpus",
        # If cleaned_corpus doesn't exist, try the text folder
        r"C:\Users\Armaghan Rafique\Desktop\AI Project\supreme_court_judgements_txt",
    ]
    
    for path in possible_paths:
        if os.path.exists(path):
            print(f"‚úÖ Found corpus folder: {path}")
            
            # Check if it has the necessary structure
            if "document_tokens.json" in os.listdir(path) or \
               os.path.exists(os.path.join(path, "document_tokens.json")):
                return path
            else:
                # Check subdirectories
                for root, dirs, files in os.walk(path):
                    if "document_tokens.json" in files:
                        print(f"‚úÖ Found document_tokens.json in: {root}")
                        return root
    
    print("‚ùå Could not find corpus folder automatically")
    return None

def main():
    """Main function"""
    print("=" * 80)
    print("‚öñÔ∏è  SUPREME COURT - BOOLEAN RETRIEVAL SYSTEM")
    print("=" * 80)
    
    # Try to find corpus folder automatically
    corpus_folder = find_corpus_folder()
    
    if not corpus_folder:
        # Manual input
        corpus_folder = input("\nüìÅ Enter corpus folder path: ").strip()
        if not os.path.exists(corpus_folder):
            print(f"‚ùå Folder does not exist: {corpus_folder}")
            return
    
    print(f"\nüìÅ Using corpus folder: {corpus_folder}")
    
    # Create and initialize retrieval system
    retrieval_system = BooleanRetrievalSystem(corpus_folder)
    
    # Load or build index
    if not retrieval_system.load_index():
        print("‚ùå Failed to initialize retrieval system")
        return
    
    # Show system info
    print(f"\nüìä SYSTEM READY")
    print(f"   Documents: {retrieval_system.stats['total_documents']:,}")
    print(f"   Vocabulary: {retrieval_system.stats['vocabulary_size']:,}")
    print(f"   Total terms: {retrieval_system.stats['total_terms']:,}")
    
    # Ask user what they want to do
    print("\n" + "=" * 80)
    print("üéØ What would you like to do?")
    print("  1. Test predefined queries")
    print("  2. Start interactive search")
    print("  3. Find documents for a specific term")
    print("  4. Get statistics for a term")
    
    choice = input("\nSelect option (1-4): ").strip()
    
    if choice == '1':
        retrieval_system.test_queries()
        # Then go to interactive
        retrieval_system.interactive_search()
    elif choice == '2':
        retrieval_system.interactive_search()
    elif choice == '3':
        term = input("Enter term to find documents: ").strip()
        if term:
            retrieval_system.find_documents_with_term(term)
        retrieval_system.interactive_search()
    elif choice == '4':
        term = input("Enter term for statistics: ").strip()
        if term:
            retrieval_system.get_term_statistics(term)
        retrieval_system.interactive_search()
    else:
        # Default to interactive search
        retrieval_system.interactive_search()

if __name__ == "__main__":
    main()

‚öñÔ∏è  SUPREME COURT - BOOLEAN RETRIEVAL SYSTEM
‚úÖ Found corpus folder: C:\Users\Armaghan Rafique\Desktop\AI Project\cleaned_corpus

üìÅ Using corpus folder: C:\Users\Armaghan Rafique\Desktop\AI Project\cleaned_corpus
‚úÖ Index loaded successfully!
   Documents: 1,460
   Vocabulary: 15,240

üìä SYSTEM READY
   Documents: 1,460
   Vocabulary: 15,240
   Total terms: 1,048,901

üéØ What would you like to do?
  1. Test predefined queries
  2. Start interactive search
  3. Find documents for a specific term
  4. Get statistics for a term



Select option (1-4):  2



üîç BOOLEAN RETRIEVAL SYSTEM - INTERACTIVE SEARCH

üìã Available Commands:
  ‚Ä¢ search <query>      - Search for documents
  ‚Ä¢ stats <term>        - Get statistics for a term
  ‚Ä¢ find <term>         - Find documents containing a term
  ‚Ä¢ preview <doc_name>  - Preview a specific document
  ‚Ä¢ terms               - Show most common terms
  ‚Ä¢ quit                - Exit the program

üìù Query Examples:
  ‚Ä¢ evidence
  ‚Ä¢ murder AND evidence
  ‚Ä¢ murder OR homicide
  ‚Ä¢ murder AND NOT evidence
  ‚Ä¢ (murder OR killing) AND weapon



üéØ Enter command:  murder



üìä Query: 'murder'
   Operation: TERM
   Documents found: 96

‚úÖ Found 96 document(s):
    1. 2025LHC7277.txt (1,051 tokens)
    2. 2025LHC7389.txt (6,103 tokens)
    3. 2025LHC7437.txt (6,891 tokens)
    4. 2025LHC7508.txt (5,160 tokens)
    5. 2025LHC7583.txt (4,272 tokens)
    6. 2025LHC7631.txt (10,326 tokens)
    7. Ateeq_Hussain_Versus_The_State_The_State_Versus_S_Ateeq_hussain_vs_the_state.txt (2,008 tokens)
    8. Bulbul_Aman_Shah_Versus_The_State_Bulbul_20Aman_20Shah_20versus_20The_20State.txt (1,083 tokens)
    9. Civil_Appeal_No022018_in_CrPLA_No_342017_The_20State_20versus_20Muhammad_20Nadeem.txt (649 tokens)
   10. CrA_No_062013__8_._Cr._A_No._6-2013_Naheed_Akhtar_v._The_State.txt (2,998 tokens)
   11. CrAppeal_No022011_in_CrPLA_No152010_The_20State_20Versus_20Muhammad_20Afzal.txt (5,445 tokens)
   12. CrAppeal_No022011_in_CrPLA_No152010_The_Stat_Muhammad_20Afzal_20Versus_20The_20State.txt (5,443 tokens)
   13. CrPLANo292015_Sufiyan_20versus_20the_20STate.txt (73 token


üéØ Enter command:  exit



üìä Query: 'exit'
   Operation: TERM
   Documents found: 1

‚úÖ Found 1 document(s):
    1. PLD2016Sindh238.txt (5,040 tokens)



üéØ Enter command:  end



üìä Query: 'end'
   Operation: TERM
   Documents found: 215

‚úÖ Found 215 document(s):
    1. 1_Tajuddin_2_Gul_Alam_sons_of_Mir_Alam_VERSUS_M__5_._20tajuddin_20etc_20vs_20Mst_20Zainab.txt (760 tokens)
    2. 2025LHC7336.txt (1,139 tokens)
    3. Abdu-Rahim_Shah_Versus_Provincial_Government_oth_Abdur-Rahim_20Shah_20versus_20the_20Prov_20Govt.txt (417 tokens)
    4. Abdul_Bari_Versus_Government_of_GB_others_Abdul_20Bari_20versus_20Government_20of_20GB.txt (217 tokens)
    5. Abdul_Bari_Vs_Provincial_Governmet_Abdul_Bari_Vs_Provincial_Govt.txt (278 tokens)
    6. Abdul_Wahid_Versus_The_State_Cr.Misc._No._07-210.txt (251 tokens)
    7. Abdur-ur-Rahim_versus_The_State_abdur-ur-rahim_vs_the_state.txt (500 tokens)
    8. Abdur-ur-rahim_vs_The_state_abdur-ur-rahim_20vs_20the_20state.txt (500 tokens)
    9. All_Gilgit-Baltistan_workers_federation_all_gilgit_baltistan_workers_federation.txt (4,452 tokens)
   10. All_Residents_of_Fultux_Versus_Muhammad_Ali_judgement_20of_20all_20residents_20of


üéØ Enter command:  

üéØ Enter command:  

üéØ Enter command:  quit


üëã Goodbye!
