In [2]:
import pandas as pd
import subprocess
import os
import re
import json
from IPython.display import display, HTML
import ipywidgets as widgets
import io

class RipgrepExplorer:
    def __init__(self):
        # Check if ripgrep is installed
        try:
            subprocess.run(['rg', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
            self.ripgrep_available = True
        except (subprocess.SubprocessError, FileNotFoundError):
            self.ripgrep_available = False
        
        # Configuration settings
        self.exclude_extensions = set([
            'jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp', 'tiff', 'svg', 'ico', 'heic', 'avif',
            'exe', 'bin', 'dll', 'zip', 'tar', 'gz', 'rar', 'mp3', 'wav', 'aac',
            'mp4', 'avi', 'mov', 'mkv', 'tmp', 'bak', 'ttf', 'otf', 'woff', 'woff2', 'eot'
        ])
        
        self.exclude_directories = set([
            '_locales', '_metadata', 'sample_csv', '.git', '.github',
            '__pycache__', '.ipynb_checkpoints', 'node_modules'
        ])
        
        # Initialize UI components
        self.setup_ui()
    
    def setup_ui(self):
        # Directory selection
        self.dir_path_input = widgets.Text(
            value=os.getcwd(),
            placeholder='Enter directory path',
            description='Directory:',
            layout=widgets.Layout(width='60%')
        )
        
        # Search input
        self.search_input = widgets.Text(
            value='',
            placeholder='Search pattern (regex supported)',
            description='Search:',
            layout=widgets.Layout(width='60%')
        )
        
        # Options
        self.case_sensitive = widgets.Checkbox(
            value=False,
            description='Case sensitive',
            layout=widgets.Layout(width='150px')
        )
        
        self.whole_word = widgets.Checkbox(
            value=False,
            description='Whole words',
            layout=widgets.Layout(width='150px')
        )
        
        self.search_hidden = widgets.Checkbox(
            value=False,
            description='Include hidden files',
            layout=widgets.Layout(width='200px')
        )
        
        self.file_pattern = widgets.Text(
            value='',
            placeholder='File pattern (e.g., *.py)',
            description='File type:',
            layout=widgets.Layout(width='40%')
        )
        
        # Encoding dropdown
        self.encoding_dropdown = widgets.Dropdown(
            options=['utf-8', 'latin-1', 'cp1252', 'iso-8859-1', 'utf-16'],
            value='utf-8',
            description='Encoding:',
            layout=widgets.Layout(width='40%')
        )
        
        # Max results
        self.max_results = widgets.IntSlider(
            value=100,
            min=10,
            max=1000,
            step=10,
            description='Max results:',
            layout=widgets.Layout(width='40%')
        )
        
        # Context lines
        self.context_lines = widgets.IntSlider(
            value=2,
            min=0,
            max=10,
            step=1,
            description='Context lines:',
            layout=widgets.Layout(width='40%')
        )
        
        # Search button
        self.search_button = widgets.Button(
            description='Search',
            button_style='primary',
            tooltip='Run ripgrep search',
            layout=widgets.Layout(width='100px')
        )
        self.search_button.on_click(self.on_search_clicked)
        
        # Status message
        self.status_output = widgets.Output()
        
        # Results output
        self.results_output = widgets.Output()
    
    def display_ui(self):
        if not self.ripgrep_available:
            display(HTML("<h3>⚠️ Ripgrep not found</h3>"))
            display(HTML("<p>Please install ripgrep (<code>rg</code>) to use this tool:</p>"))
            display(HTML("<ul><li>Ubuntu/Debian: <code>sudo apt install ripgrep</code></li>"))
            display(HTML("<li>macOS: <code>brew install ripgrep</code></li>"))
            display(HTML("<li>Windows: <code>choco install ripgrep</code> or <code>scoop install ripgrep</code></li></ul>"))
            return
        
        # Main layout
        display(HTML("<h2>Ripgrep File Search</h2>"))
        
        # Directory and search inputs
        display(widgets.HBox([self.dir_path_input]))
        display(widgets.HBox([self.search_input]))
        
        # Options row
        display(widgets.HBox([
            self.case_sensitive, 
            self.whole_word, 
            self.search_hidden
        ]))
        
        # File pattern and options
        display(widgets.HBox([self.file_pattern, self.encoding_dropdown]))
        display(widgets.HBox([self.max_results, self.context_lines]))
        
        # Search button and status
        display(widgets.HBox([self.search_button]))
        display(self.status_output)
        display(self.results_output)
        
        # Display instructions
        with self.status_output:
            print("Ready to search. Enter a search pattern and click 'Search'.")
            print("If you encounter encoding errors, try different encodings from the dropdown.")
    
    def on_search_clicked(self, b):
        self.status_output.clear_output()
        self.results_output.clear_output()
        
        search_term = self.search_input.value.strip()
        directory = self.dir_path_input.value.strip()
        
        with self.status_output:
            if not search_term:
                print("Please enter a search term.")
                return
            
            if not directory:
                print("Please enter a directory path.")
                return
            
            if not os.path.exists(directory):
                print(f"Directory '{directory}' does not exist.")
                return
            
            if not os.path.isdir(directory):
                print(f"'{directory}' is not a directory.")
                return
            
            print(f"Searching for '{search_term}' in {directory}...")
            
            try:
                self.run_ripgrep_search(search_term, directory)
            except Exception as e:
                print(f"Error during search: {str(e)}")
                print("\nTry changing the encoding in the dropdown menu above.")
    
    def build_ripgrep_command(self, search_term, directory):
        cmd = ['rg']
        
        # Basic options
        if not self.case_sensitive.value:
            cmd.append('-i')  # Case insensitive
        
        if self.whole_word.value:
            cmd.append('-w')  # Whole word
        
        if not self.search_hidden.value:
            cmd.append('--no-hidden')  # Skip hidden files
        
        # Add context lines
        if self.context_lines.value > 0:
            cmd.extend(['-C', str(self.context_lines.value)])
        
        # Add file type pattern if specified
        if self.file_pattern.value:
            cmd.extend(['-g', self.file_pattern.value])
        
        # Exclude file extensions
        for ext in self.exclude_extensions:
            cmd.extend(['-g', f'!*.{ext}'])
        
        # Exclude directories
        for dir_name in self.exclude_directories:
            cmd.extend(['-g', f'!{dir_name}/*'])
        
        # Add output formatting
        cmd.extend(['--json'])  # JSON output for parsing
        
        # Set max count to avoid huge outputs
        cmd.extend(['--max-count', str(self.max_results.value)])
        
        # Add search pattern and directory
        cmd.append(search_term)
        cmd.append(directory)
        
        return cmd
    
    def run_ripgrep_search(self, search_term, directory):
        cmd = self.build_ripgrep_command(search_term, directory)
        
        with self.status_output:
            print(f"Running: {' '.join(cmd)}")
        
        # Run ripgrep command
        process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            bufsize=1
        )
        
        # Collect results
        results = []
        error_output = []
        encoding = self.encoding_dropdown.value
        
        # Process stdout with specified encoding
        for line in io.TextIOWrapper(process.stdout, encoding=encoding, errors='replace'):
            try:
                if line.strip():
                    match = json.loads(line)
                    results.append(match)
            except json.JSONDecodeError:
                error_output.append(f"JSON Error: {line}")
        
        # Process stderr
        for line in io.TextIOWrapper(process.stderr, encoding=encoding, errors='replace'):
            error_output.append(line)
        
        # Wait for process to complete
        process.wait()
        
        # Display results
        with self.results_output:
            if process.returncode != 0 and process.returncode != 1:  # 1 means no matches
                print(f"Error (return code {process.returncode}):")
                for line in error_output:
                    print(line)
                return
            
            if error_output:
                print("Warnings:")
                for line in error_output[:5]:  # Show only first 5 warnings
                    print(line)
                if len(error_output) > 5:
                    print(f"...and {len(error_output) - 5} more warnings.")
                print("\n")
            
            if not results:
                print(f"No matches found for '{search_term}'")
                return
            
            # Process and display results
            self.display_search_results(results)
    
    def display_search_results(self, results):
        with self.results_output:
            # Group by file
            file_matches = {}
            
            for match in results:
                if 'type' not in match or match['type'] != 'match':
                    continue
                
                try:
                    file_path = match['data']['path']['text']
                    if file_path not in file_matches:
                        file_matches[file_path] = []
                    
                    file_matches[file_path].append(match)
                except KeyError:
                    # Skip malformed match data
                    continue
            
            # Display summary
            num_files = len(file_matches)
            num_matches = sum(len(matches) for matches in file_matches.values())
            
            display(HTML(f"<h3>Found {num_matches} matches in {num_files} files</h3>"))
            
            # Create table of files and match counts
            file_data = [
                {"File": file_path, "Matches": len(matches)}
                for file_path, matches in file_matches.items()
            ]
            
            df = pd.DataFrame(file_data)
            display(df)
            
            # Display matches with HTML formatting
            for file_path, matches in file_matches.items():
                display(HTML(f"<h4>{file_path} ({len(matches)} matches)</h4>"))
                
                # Show first 10 matches per file with context
                for i, match in enumerate(matches[:10]):
                    try:
                        data = match['data']
                        line_number = data['line_number']
                        
                        # Get the matched line and submatches
                        content = data['lines']['text']
                        
                        # Some matches might not have submatches if they're context lines
                        if 'submatches' in data and data['submatches']:
                            match_start = data['submatches'][0]['start']
                            match_end = data['submatches'][0]['end']
                            
                            # Handle potential index errors
                            if match_start < len(content) and match_end <= len(content):
                                # Highlight the match
                                highlighted_content = (
                                    content[:match_start] + 
                                    f"<mark><b>{content[match_start:match_end]}</b></mark>" + 
                                    content[match_end:]
                                )
                            else:
                                highlighted_content = content
                        else:
                            highlighted_content = content
                        
                        # Display the match
                        display(HTML(f"<pre><code>Line {line_number}: {highlighted_content}</code></pre>"))
                    except (KeyError, IndexError) as e:
                        # Skip malformed matches
                        display(HTML(f"<p><i>Error displaying match: {str(e)}</i></p>"))
                
                if len(matches) > 10:
                    display(HTML(f"<p><i>...and {len(matches) - 10} more matches</i></p>"))
                
                display(HTML("<hr>"))
    
    def search_with_direct_command(self, search_term=None, directory=None, options=None):
        """
        Execute a direct ripgrep command and display results.
        This is useful for running custom ripgrep commands with specific options.
        
        Args:
            search_term (str): The term to search for
            directory (str): Directory to search in
            options (list): List of ripgrep command line options
        """
        if search_term is None:
            search_term = self.search_input.value
        
        if directory is None:
            directory = self.dir_path_input.value
        
        # Build the command
        cmd = ['rg']
        
        # Add custom options if provided
        if options:
            cmd.extend(options)
        
        # Add search term and directory
        cmd.append(search_term)
        cmd.append(directory)
        
        # Run the command
        with self.status_output:
            print(f"Running direct command: {' '.join(cmd)}")
        
        try:
            # Run the command and capture output
            encoding = self.encoding_dropdown.value
            result = subprocess.run(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                encoding=encoding,
                errors='replace'
            )
            
            # Display results
            with self.results_output:
                if result.stderr:
                    print("Errors/Warnings:")
                    print(result.stderr)
                    print("\n")
                
                if result.stdout:
                    print("Results:")
                    print(result.stdout)
                else:
                    print("No matches found.")
                
        except Exception as e:
            with self.status_output:
                print(f"Error executing command: {str(e)}")

# Create and display the explorer
explorer = RipgrepExplorer()
explorer.display_ui()

# Usage example for direct command (more flexible for custom options)
# explorer.search_with_direct_command("todo", ".", ["-i", "--no-hidden", "-C", "2"])

HBox(children=(Text(value='C:\\python\\script\\notebooks', description='Directory:', layout=Layout(width='60%'…

HBox(children=(Text(value='', description='Search:', layout=Layout(width='60%'), placeholder='Search pattern (…

HBox(children=(Checkbox(value=False, description='Case sensitive', layout=Layout(width='150px')), Checkbox(val…

HBox(children=(Text(value='', description='File type:', layout=Layout(width='40%'), placeholder='File pattern …

HBox(children=(IntSlider(value=100, description='Max results:', layout=Layout(width='40%'), max=1000, min=10, …

HBox(children=(Button(button_style='primary', description='Search', layout=Layout(width='100px'), style=Button…

Output()

Output()