In [2]:
# Fast Code Repository Search using Ripgrep
import subprocess
import re
import os
import pandas as pd
from IPython.display import display
import time

# Configuration - MODIFY THESE VARIABLES
REPO_PATH = r"C:\python\erpnext"  # Change this to your repository path, e.g., "/home/user/erpnext"
SEARCH_TERM = "Task"  # Change to your search term

# Optional filters
FILE_PATTERN = "*.py"  # Set to None or "" for all files, or specify pattern like "*.py"
CASE_SENSITIVE = False  # Set to True for case-sensitive search

def search_with_ripgrep(repo_path, search_term, file_pattern=None, case_sensitive=False):
    """Use ripgrep to search for a term and return ranked results."""
    # Change to the repository directory
    original_dir = os.getcwd()
    os.chdir(repo_path)
    
    try:
        # Build ripgrep command
        cmd = ["rg", "--count"]
        
        # Add case insensitive flag if needed
        if not case_sensitive:
            cmd.append("-i")
        
        # Add file pattern if specified
        if file_pattern:
            cmd.extend(["-g", file_pattern])
        
        # Add search term and other options
        cmd.extend([search_term, "."])
        
        # Start time measurement
        start_time = time.time()
        
        # Execute ripgrep command
        result = subprocess.run(cmd, capture_output=True, text=True)
        
        # Calculate search time
        search_time = time.time() - start_time
        
        # Parse results
        results = []
        if result.returncode not in [0, 1]:  # 0: matches found, 1: no matches found
            raise Exception(f"Ripgrep error: {result.stderr}")
        
        # Process stdout lines to get file paths and counts
        for line in result.stdout.strip().split('\n'):
            if line:  # Skip empty lines
                # Format is: path/to/file:count
                parts = line.rsplit(':', 1)  # Split on last colon
                if len(parts) == 2:
                    file_path, count_str = parts
                    try:
                        count = int(count_str)
                        results.append((file_path, count))
                    except ValueError:
                        print(f"Warning: Couldn't parse count from {line}")
        
        # Sort by count (descending)
        results.sort(key=lambda x: x[1], reverse=True)
        
        return results, search_time
        
    finally:
        # Restore original directory
        os.chdir(original_dir)

# Check if ripgrep is installed
try:
    subprocess.run(["rg", "--version"], capture_output=True)
    print("✅ Ripgrep detected!")
except FileNotFoundError:
    print("❌ Ripgrep not found! Please install it: https://github.com/BurntSushi/ripgrep#installation")
    raise SystemExit("Ripgrep is required for this script.")

# Check if repository path exists
if not os.path.exists(REPO_PATH):
    print(f"❌ Repository path '{REPO_PATH}' does not exist. Please update the REPO_PATH variable.")
else:
    print(f"✅ Repository path: {REPO_PATH}")

# Execute the search
print(f"🔍 Searching for '{SEARCH_TERM}' in {FILE_PATTERN or 'all files'}...")
results, search_time = search_with_ripgrep(REPO_PATH, SEARCH_TERM, FILE_PATTERN, CASE_SENSITIVE)

# Display results
print(f"\n✨ Found {len(results)} files containing '{SEARCH_TERM}' in {search_time:.2f} seconds")
if results:
    print("\nRanked results (format: path/to/file count):")
    for file_path, count in results:
        print(f"{file_path} {count}")
    
    # Create DataFrame for interactive filtering (optional - requires pandas)
    df = pd.DataFrame(results, columns=['File Path', 'Occurrences'])
    print("\nDataFrame representation:")
    print(df)
else:
    print("No results found.")

✅ Ripgrep detected!
✅ Repository path: C:\python\erpnext
🔍 Searching for 'Task' in *.py...

✨ Found 57 files containing 'Task' in 0.25 seconds

Ranked results (format: path/to/file count):
.\erpnext\projects\doctype\project\test_project.py 106
.\erpnext\projects\doctype\task\task.py 99
.\erpnext\projects\doctype\project\project.py 77
.\erpnext\projects\doctype\task\test_task.py 63
.\erpnext\setup\doctype\transaction_deletion_record\transaction_deletion_record.py 45
.\erpnext\assets\doctype\asset_maintenance\asset_maintenance.py 36
.\erpnext\patches\v13_0\update_project_template_tasks.py 21
.\erpnext\projects\report\project_summary\project_summary.py 19
.\erpnext\projects\report\delayed_tasks_summary\delayed_tasks_summary.py 18
.\erpnext\templates\pages\projects.py 16
.\erpnext\projects\report\delayed_tasks_summary\test_delayed_tasks_summary.py 15
.\erpnext\projects\doctype\project_template\project_template.py 15
.\erpnext\projects\doctype\timesheet\timesheet.py 13
.\erpnext\projects\do