In [2]:
import os
import re
from pathlib import Path
import csv
from datetime import datetime


class CodeElementFinder:
    def __init__(self):
        # Define patterns for different file types
        self.patterns = {
            '.py': [
                (re.compile(r'\bclass\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*(?:\(|:)'), 'class'),
                (re.compile(r'\bdef\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\('), 'function'),
                (re.compile(r'\b([a-zA-Z_][a-zA-Z0-9_]*)\s*='), 'variable'),
            ],
            '.js': [
                (re.compile(r'\bclass\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*{'), 'class'),
                (re.compile(r'\bfunction\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\('), 'function'),
                (re.compile(r'\b(?:let|var|const)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*='), 'variable'),
                (re.compile(r'\bconst\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*\('), 'function'),
            ],
            '.ts': [
                (re.compile(r'\bclass\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*{'), 'class'),
                (re.compile(r'\bfunction\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\('), 'function'),
                (re.compile(r'\b(?:let|var|const)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*(?::|=)'), 'variable'),
                (re.compile(r'\bconst\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*\('), 'function'),
                (re.compile(r'\binterface\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*{'), 'interface'),
            ],
            '.svelte': [
                (re.compile(r'\bclass\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*{'), 'class'),
                (re.compile(r'\bfunction\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\('), 'function'),
                (re.compile(r'\b(?:let|var|const)\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*(?::|=)'), 'variable'),
                (re.compile(r'\bconst\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*\('), 'function'),
                # Svelte specific patterns
                (re.compile(r'\bexport\s+let\s+([a-zA-Z_][a-zA-Z0-9_]*)'), 'svelte_prop'),
            ],
            '.java': [
                (re.compile(r'\bclass\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*{'), 'class'),
                (re.compile(r'\b(?:public|private|protected)?\s+(?:static\s+)?[a-zA-Z_][a-zA-Z0-9_<>[\],\s]*\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\('), 'function'),
                (re.compile(r'\b(?:public|private|protected)?\s+(?:static\s+)?(?:final\s+)?[a-zA-Z_][a-zA-Z0-9_<>[\],\s]*\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*='), 'variable'),
            ],
            '.cpp': [
                (re.compile(r'\bclass\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*{'), 'class'),
                (re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\('), 'function'),
                (re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*='), 'variable'),
            ],
            '.c': [
                (re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\('), 'function'),
                (re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*='), 'variable'),
            ],
        }
        self.valid_extensions = set(self.patterns.keys())

    def find_code_elements(self, file_path):
        """Process a single file and return list of (file_type, element_type, element_name) tuples."""
        try:
            ext = os.path.splitext(file_path)[1].lower()
            if ext not in self.valid_extensions:
                return []

            patterns = self.patterns.get(ext)
            try:
                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                    content = f.read()
            except (IOError, UnicodeDecodeError):
                return []

            results = set()
            for pattern, element_type in patterns:
                for match in pattern.finditer(content):
                    name = match.group(1)
                    # Skip common keywords and built-ins
                    if name in ('if', 'for', 'while', 'return', 'true', 'false', 'null', 'undefined'):
                        continue
                    # Only include elements that have 3 or more characters in their name
                    if len(name) >= 3:
                        results.add((ext, element_type, name))

            return list(results)
        except Exception as e:
            print(f"Error processing {file_path}: {str(e)}")
            return []

    def scan_directory(self, repo_path, extensions=None, output_file=None):
        """Scan directory and save results to a CSV file."""
        repo = Path(repo_path)
        if not repo.is_dir():
            return "Invalid repository path"

        if extensions:
            extensions = {ext if ext.startswith('.') else f'.{ext}' for ext in extensions}
            target_extensions = extensions & self.valid_extensions
        else:
            target_extensions = self.valid_extensions

        if not target_extensions:
            return "No valid file extensions specified."

        # Find all matching files
        files_to_process = []
        print(f"Finding files with extensions: {', '.join(target_extensions)}...")
        for ext in target_extensions:
            files_to_process.extend(repo.glob(f'**/*{ext}'))

        total_files = len(files_to_process)
        if total_files == 0:
            return "No matching files found."

        print(f"Processing {total_files} files...")

        # Create output file path if not provided
        if not output_file:
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            output_file = f"code_elements_{timestamp}.csv"

        output_path = Path(output_file)

        # Process files sequentially with manual progress tracking
        results = []
        processed = 0

        for file in files_to_process:
            elements = self.find_code_elements(file)
            if elements:
                results.extend(elements)

            processed += 1
            if processed % 10 == 0 or processed == total_files:
                print(f"Processed {processed}/{total_files} files ({processed/total_files*100:.1f}%)")

        # Convert to unique elements
        unique_results = list(set(results))

        if not unique_results:
            return "No code elements found."

        # Save results to CSV
        try:
            with open(output_path, 'w', newline='', encoding='utf-8') as f:
                writer = csv.writer(f)
                writer.writerow(['File Type', 'Element Type', 'Element Name'])
                writer.writerows(unique_results)
            return f"Saved {len(unique_results)} unique elements to {output_path}"
        except IOError as e:
            return f"Error saving to file: {str(e)}"


if __name__ == "__main__":
    finder = CodeElementFinder()
    repo_path = input("Enter repository path: ")
    extensions = input("Enter file extensions (comma separated, e.g., py,js,ts,svelte): ").split(',')
    result = finder.scan_directory(repo_path, extensions)
    print(result)

Finding files with extensions: .ts, .py, .js, .svelte...
Processing 521 files...
Processed 10/521 files (1.9%)
Processed 20/521 files (3.8%)
Processed 30/521 files (5.8%)
Processed 40/521 files (7.7%)
Processed 50/521 files (9.6%)
Processed 60/521 files (11.5%)
Processed 70/521 files (13.4%)
Processed 80/521 files (15.4%)
Processed 90/521 files (17.3%)
Processed 100/521 files (19.2%)
Processed 110/521 files (21.1%)
Processed 120/521 files (23.0%)
Processed 130/521 files (25.0%)
Processed 140/521 files (26.9%)
Processed 150/521 files (28.8%)
Processed 160/521 files (30.7%)
Processed 170/521 files (32.6%)
Processed 180/521 files (34.5%)
Processed 190/521 files (36.5%)
Processed 200/521 files (38.4%)
Processed 210/521 files (40.3%)
Processed 220/521 files (42.2%)
Processed 230/521 files (44.1%)
Processed 240/521 files (46.1%)
Processed 250/521 files (48.0%)
Processed 260/521 files (49.9%)
Processed 270/521 files (51.8%)
Processed 280/521 files (53.7%)
Processed 290/521 files (55.7%)
Proce