In [6]:
from pathlib import Path
from sentence_transformers import SentenceTransformer
import numpy as np
import re

  from .autonotebook import tqdm as notebook_tqdm


In [1]:
from pathlib import Path
import re

class SimpleKeywordFinder:
    def __init__(self, vault_path):
        self.vault_path = Path(vault_path)
        print(f"üìÅ Vault path: {vault_path}")
        
    def search_keyword(self, keyword):
        """Simple keyword search through all .md files"""
        print(f"\nüîç Searching for keyword: '{keyword}'")
        
        # Get all .md files
        md_files = list(self.vault_path.rglob("*.md"))
        print(f"üìö Scanning {len(md_files)} markdown files...")
        
        if not md_files:
            print("‚ùå No .md files found in vault!")
            return []
        
        matching_files = []
        processed = 0
        
        for file_path in md_files:
            processed += 1
            if processed % 100 == 0:  # Progress for large vaults
                print(f"   Processed {processed}/{len(md_files)} files...")
                
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                    
                # Simple case-insensitive search
                if keyword.lower() in content.lower():
                    # Find context around the keyword
                    context = self.get_keyword_context(content, keyword)
                    
                    matching_files.append({
                        'file_path': str(file_path),
                        'file_name': file_path.name,
                        'folder': str(file_path.parent),
                        'context': context,
                        'file_size': len(content)
                    })
                    
            except Exception as e:
                print(f"‚ö†Ô∏è  Error reading {file_path}: {e}")
        
        print(f"‚úÖ Search complete! Found {len(matching_files)} files containing '{keyword}'")
        return matching_files
    
    def get_keyword_context(self, content, keyword):
        """Get text around the keyword for preview"""
        lines = content.split('\n')
        context_lines = []
        
        for i, line in enumerate(lines):
            if keyword.lower() in line.lower():
                # Get surrounding lines for context
                start = max(0, i-1)
                end = min(len(lines), i+2)
                context = '\n'.join(lines[start:end])
                context_lines.append(context)
                
                if len(context_lines) >= 2:  # Limit context snippets
                    break
        
        return '\n...\n'.join(context_lines) if context_lines else "No context found"
    
    def interactive_search(self):
        """Interactive keyword search"""
        while True:
            print("\n" + "="*60)
            print("üîç KEYWORD FILE FINDER")
            print("Find .md files containing your keyword")
            print("="*60)
            
            keyword = input("\nüí≠ Enter keyword to search: ").strip()
            
            if not keyword:
                print("‚ùå Please enter a keyword")
                continue
                
            if keyword.lower() in ['quit', 'exit', 'stop']:
                print("üëã Goodbye!")
                break
            
            # Search for keyword
            matching_files = self.search_keyword(keyword)
            
            if not matching_files:
                print(f"‚ùå No files found containing '{keyword}'")
                continue
            
            # Show results
            print(f"\nüìã FOUND {len(matching_files)} FILES:")
            print("-" * 80)
            
            for i, file_info in enumerate(matching_files, 1):
                print(f"\n{i}. {file_info['file_name']}")
                print(f"   üìÇ Folder: {file_info['folder']}")
                print(f"   üìè Size: {file_info['file_size']} chars")
                print(f"   üìù Context:")
                print(f"      {file_info['context'][:200]}...")
                
            # Let user choose
            print(f"\nüéØ SELECT A FILE:")
            choice = input(f"Enter number (1-{len(matching_files)}) or 'new' for new search: ").strip()
            
            if choice.lower() == 'new':
                continue
                
            if choice.isdigit() and 1 <= int(choice) <= len(matching_files):
                selected_file = matching_files[int(choice) - 1]
                self.show_full_file(selected_file)
            else:
                print("‚ùå Invalid choice")
    
    def show_full_file(self, file_info):
        """Show full content of selected file"""
        print(f"\nüìñ FULL CONTENT: {file_info['file_name']}")
        print("="*100)
        
        try:
            with open(file_info['file_path'], 'r', encoding='utf-8') as f:
                content = f.read()
                print(content)
        except Exception as e:
            print(f"‚ùå Error reading file: {e}")
            
        print("="*100)
        
        # Ask what to do next
        action = input("\nWhat next? (s)earch again, (q)uit: ").strip().lower()
        if action == 'q':
            return False  # Signal to quit
        return True  # Continue

In [3]:

if __name__ == "__main__":
    print("üß† Simple Keyword File Finder")
    print("Perfect for large Joplin vaults!")
    
    vault_path = input("\nEnter path to your vault: ").strip()
    
    if not vault_path:
        print("‚ùå Please provide a valid path")
        exit()
    
    if not Path(vault_path).exists():
        print(f"‚ùå Path {vault_path} doesn't exist")
        exit()
    
    finder = SimpleKeywordFinder(vault_path)
    finder.interactive_search()


üß† Simple Keyword File Finder
Perfect for large Joplin vaults!
üìÅ Vault path: D:\LOST.DIR\Obsidian Vault

üîç KEYWORD FILE FINDER
Find .md files containing your keyword

üîç Searching for keyword: 'query tranformation'
üìö Scanning 227 markdown files...
   Processed 100/227 files...
   Processed 200/227 files...
‚úÖ Search complete! Found 0 files containing 'query tranformation'
‚ùå No files found containing 'query tranformation'

üîç KEYWORD FILE FINDER
Find .md files containing your keyword

üîç Searching for keyword: 'query transformation'
üìö Scanning 227 markdown files...
   Processed 100/227 files...
   Processed 200/227 files...
‚úÖ Search complete! Found 3 files containing 'query transformation'

üìã FOUND 3 FILES:
--------------------------------------------------------------------------------

1. Rag(Query Transformation ).md
   üìÇ Folder: D:\LOST.DIR\Obsidian Vault\11. RAG
   üìè Size: 7122 chars
   üìù Context:
      ---
title: Rag(Query Transformation )
upda

In [4]:
from pathlib import Path
import re
from typing import List
import difflib

class EnhancedKeywordFinder:
    def __init__(self, vault_path):
        self.vault_path = Path(vault_path)
        print(f"üìÅ Vault path: {vault_path}")
        
        # Load all files and extract common words for spell checking
        self.all_files = list(self.vault_path.rglob("*.md"))
        self.common_words = set()
        self._build_vocabulary()
        
    def _build_vocabulary(self):
        """Build vocabulary from all files for spell checking"""
        print("üîß Building vocabulary for spell checking...")
        for file_path in self.all_files[:50]:  # Sample first 50 files for vocabulary
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read().lower()
                    # Extract words (3+ characters)
                    words = re.findall(r'\b[a-zA-Z]{3,}\b', content)
                    self.common_words.update(words)
            except:
                continue
        print(f"‚úÖ Built vocabulary with {len(self.common_words)} words")
    
    def spell_check_keyword(self, keyword):
        """Try to correct spelling of keyword"""
        if not self.common_words:
            return keyword
            
        # Get close matches
        suggestions = difflib.get_close_matches(
            keyword.lower(), 
            self.common_words, 
            n=3, 
            cutoff=0.6
        )
        
        if suggestions and suggestions[0] != keyword.lower():
            print(f"üî§ Did you mean: {', '.join(suggestions[:3])}?")
            return suggestions[0]
        
        return keyword
    
    def search_keyword(self, keyword, auto_correct=True):
        """Search for keyword with optional spell correction"""
        original_keyword = keyword
        print(f"\nüîç Searching for keyword: '{keyword}'")
        
        if not self.all_files:
            print("‚ùå No .md files found in vault!")
            return []
        
        matching_files = []
        processed = 0
        
        for file_path in self.all_files:
            processed += 1
            if processed % 100 == 0:
                print(f"   Processed {processed}/{len(self.all_files)} files...")
                
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                    
                    # Case-insensitive substring search
                    if keyword.lower() in content.lower():
                        context = self.get_keyword_context(content, keyword)
                        matching_files.append({
                            'file_path': str(file_path),
                            'file_name': file_path.name,
                            'folder': str(file_path.parent),
                            'context': context,
                            'file_size': len(content)
                        })
                        
            except Exception as e:
                print(f"‚ö†Ô∏è  Error reading {file_path}: {e}")
        
        # If no matches and auto-correct enabled, try spell correction
        if not matching_files and auto_correct:
            corrected_keyword = self.spell_check_keyword(keyword)
            if corrected_keyword != keyword.lower():
                print(f"üîÑ Trying corrected spelling: '{corrected_keyword}'")
                return self.search_keyword(corrected_keyword, auto_correct=False)
        
        if matching_files:
            print(f"‚úÖ Found {len(matching_files)} files containing '{keyword}'")
        else:
            print(f"‚ùå No files found containing '{keyword}'")
            
        return matching_files
    
    def get_keyword_context(self, content, keyword):
        """Get text around the keyword for preview"""
        lines = content.split('\n')
        context_lines = []
        
        for i, line in enumerate(lines):
            if keyword.lower() in line.lower():
                # Get surrounding lines for context
                start = max(0, i-1)
                end = min(len(lines), i+2)
                context = '\n'.join(lines[start:end])
                context_lines.append(context)
                
                if len(context_lines) >= 2:  # Limit context snippets
                    break
        
        return '\n...\n'.join(context_lines) if context_lines else "No context found"
    
    def display_search_results(self, matching_files):
        """Display search results with selection options"""
        if not matching_files:
            return
            
        print(f"\nüìã FOUND {len(matching_files)} FILES:")
        print("-" * 80)
        
        for i, file_info in enumerate(matching_files, 1):
            print(f"\n{i}. {file_info['file_name']}")
            print(f"   üìÇ Folder: {file_info['folder']}")
            print(f"   üìè Size: {file_info['file_size']:,} chars")
            print(f"   üìù Context Preview:")
            
            # Show context with limited lines
            context_lines = file_info['context'].split('\n')[:3]
            for line in context_lines:
                print(f"      {line[:100]}{'...' if len(line) > 100 else ''}")
    
    def select_multiple_files(self, matching_files):
        """Allow user to select multiple files"""
        if not matching_files:
            return []
        
        print(f"\nüéØ SELECT FILES (Multiple Selection Supported):")
        print("Examples:")
        print("  ‚Ä¢ Single: 1")
        print("  ‚Ä¢ Multiple: 1,3,5")
        print("  ‚Ä¢ Range: 1-3")
        print("  ‚Ä¢ All: all")
        
        while True:
            choice = input(f"\nYour selection (1-{len(matching_files)}): ").strip()
            
            if not choice:
                print("‚ùå Please make a selection")
                continue
            
            if choice.lower() == 'all':
                return matching_files
            
            try:
                selected_indices = []
                
                # Handle comma-separated values
                parts = choice.split(',')
                for part in parts:
                    part = part.strip()
                    
                    # Handle ranges (e.g., "1-3")
                    if '-' in part:
                        start, end = map(int, part.split('-'))
                        selected_indices.extend(range(start-1, end))
                    else:
                        selected_indices.append(int(part) - 1)
                
                # Filter valid indices
                valid_indices = [i for i in selected_indices if 0 <= i < len(matching_files)]
                
                if not valid_indices:
                    print("‚ùå No valid selections made")
                    continue
                
                selected_files = [matching_files[i] for i in valid_indices]
                print(f"‚úÖ Selected {len(selected_files)} files")
                return selected_files
                
            except ValueError:
                print("‚ùå Invalid format. Use numbers, commas, or ranges (e.g., 1,3,5 or 1-3)")
                continue
    
    def view_selected_files(self, selected_files):
        """Display content of selected files"""
        if not selected_files:
            print("‚ùå No files selected")
            return
        
        print(f"\nüìö VIEWING {len(selected_files)} SELECTED FILES:")
        print("=" * 100)
        
        for i, file_info in enumerate(selected_files, 1):
            print(f"\nüìñ FILE {i}/{len(selected_files)}: {file_info['file_name']}")
            print(f"üìÇ Path: {file_info['file_path']}")
            print("-" * 100)
            
            try:
                with open(file_info['file_path'], 'r', encoding='utf-8') as f:
                    content = f.read()
                    print(content)
            except Exception as e:
                print(f"‚ùå Error reading file: {e}")
            
            print("-" * 100)
            
            # Pause between files for large selections
            if i < len(selected_files):
                input(f"\nPress Enter to view next file ({i+1}/{len(selected_files)})...")
        
        print("\n‚úÖ Finished viewing all selected files")
    
    def interactive_search(self):
        """Main interactive search interface"""
        while True:
            print("\n" + "="*80)
            print("üîç ENHANCED KEYWORD FILE FINDER")
            print("‚ú® Features: Multi-select ‚Ä¢ View multiple files ‚Ä¢ Spell checking")
            print("="*80)
            
            keyword = input("\nüí≠ Enter keyword to search: ").strip()
            
            if not keyword:
                print("‚ùå Please enter a keyword")
                continue
                
            if keyword.lower() in ['quit', 'exit', 'stop', 'q']:
                print("üëã Goodbye!")
                break
            
            # Search for files
            matching_files = self.search_keyword(keyword)
            
            if not matching_files:
                continue
            
            # Display results
            self.display_search_results(matching_files)
            
            # Let user select multiple files
            selected_files = self.select_multiple_files(matching_files)
            
            if not selected_files:
                continue
            
            # Ask what to do with selected files
            print(f"\nüéØ What would you like to do with {len(selected_files)} selected files?")
            print("1. View file contents")
            print("2. Search for different keyword")
            print("3. Quit")
            
            action = input("Choose action (1-3): ").strip()
            
            if action == '1':
                self.view_selected_files(selected_files)
            elif action == '2':
                continue
            elif action == '3':
                print("üëã Goodbye!")
                break
            else:
                print("‚ùå Invalid choice, returning to search...")

if __name__ == "__main__":
    print("üß† Enhanced Multi-Select Keyword Finder")
    print("Perfect for large Joplin vaults with spell checking!")
    
    vault_path = input("\nEnter path to your vault: ").strip()
    
    if not vault_path:
        print("‚ùå Please provide a valid path")
        exit()
    
    if not Path(vault_path).exists():
        print(f"‚ùå Path {vault_path} doesn't exist")
        exit()
    
    finder = EnhancedKeywordFinder(vault_path)
    finder.interactive_search()

üß† Enhanced Multi-Select Keyword Finder
Perfect for large Joplin vaults with spell checking!
üìÅ Vault path: D:\LOST.DIR\Obsidian Vault
üîß Building vocabulary for spell checking...
‚úÖ Built vocabulary with 2984 words

üîç ENHANCED KEYWORD FILE FINDER
‚ú® Features: Multi-select ‚Ä¢ View multiple files ‚Ä¢ Spell checking

üîç Searching for keyword: 'agets'
   Processed 100/227 files...
   Processed 200/227 files...
üî§ Did you mean: agents, gets, agent?
üîÑ Trying corrected spelling: 'agents'

üîç Searching for keyword: 'agents'
   Processed 100/227 files...
   Processed 200/227 files...
‚úÖ Found 20 files containing 'agents'

üìã FOUND 20 FILES:
--------------------------------------------------------------------------------

1. 1. E-commerce website shittt.md
   üìÇ Folder: D:\LOST.DIR\Obsidian Vault\1. Projects\1. Big Data Project Ideas
   üìè Size: 5,635 chars
   üìù Context Preview:
      
      - Customers expect **instant replies** from chatbots or human agents.
   