In [1]:
import os
import re
from collections import defaultdict
import csv
from typing import Dict, Set, Tuple


def find_function_usage(directory: str) -> Tuple[Dict[str, Dict[str, int]], Set[str], Dict[str, str]]:
    """
    Find all defined functions in .py files within a directory and track their usage
    throughout the codebase.

    Returns:
        - A dictionary mapping function names to files and their usage count
        - A set of unused functions
        - A dictionary mapping function names to their source files
    """
    # Pattern to match Python function definitions
    function_pattern = re.compile(r'''
        ^\s*def\s+([a-zA-Z_]\w*)\s*\(  # Match 'def function_name('
        (?!.*(?:__init__|__call__|__[^_]+__))  # Exclude special methods
    ''', re.VERBOSE | re.MULTILINE)

    # File extensions to scan (only Python files)
    extensions = ('.py',)

    # To store all function names and their source files
    functions_map = {}  # name -> source file
    all_files = []

    # First pass: Collect all function definitions
    print("Scanning for function definitions...")
    for root, _, files in os.walk(directory):
        for filename in files:
            if filename.endswith(extensions):
                filepath = os.path.join(root, filename)
                relative_path = os.path.relpath(filepath, directory)
                all_files.append((relative_path, filepath))

                try:
                    with open(filepath, 'r', encoding='utf-8') as file:
                        content = file.read()

                        for match in function_pattern.finditer(content):
                            func_name = match.group(1)
                            # Skip private functions (starting with _)
                            if not func_name.startswith('_'):
                                functions_map[func_name] = relative_path
                                print(f"Found function: {func_name} in {relative_path}")

                except Exception as e:
                    print(f"Error reading {filepath}: {e}")

    print(f"Found {len(functions_map)} functions")

    # Second pass: Count occurrences of each function
    print("Scanning for usage...")
    usage_by_function = defaultdict(lambda: defaultdict(int))

    for relative_path, filepath in all_files:
        try:
            with open(filepath, 'r', encoding='utf-8') as file:
                content = file.read()
                # Remove comments and strings to avoid false positives
                code_without_comments = re.sub(
                    r'#.*$|"""[\s\S]*?"""|\'\'\'[\s\S]*?\'\'\'|"(?:[^"\\]|\\.)*"|\'(?:[^\'\\]|\\.)*\'',
                    '',
                    content,
                    flags=re.MULTILINE
                )

                for func_name in functions_map.keys():
                    pattern = r'\b' + re.escape(func_name) + r'(?=\s*\()'  # Look for function calls
                    matches = re.findall(pattern, code_without_comments)
                    if matches:
                        usage_by_function[func_name][relative_path] = len(matches)

        except Exception as e:
            print(f"Error reading {filepath}: {e}")

    # Identify unused functions
    unused_functions = set()
    for func_name, usages in usage_by_function.items():
        source_file = functions_map.get(func_name)
        if not usages or (len(usages) == 1 and source_file in usages):
            unused_functions.add(func_name)

    return usage_by_function, unused_functions, functions_map


def write_results_to_csv(usage_data: Dict, unused_functions: Set, functions_map: Dict,
                         output_file: str = "function_usage_report.csv") -> None:
    """Write the results to a CSV file."""
    with open(output_file, 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Function Name", "Source File", "Total Calls", "Unused?", "Found In Files"])

        sorted_functions = sorted(
            functions_map.keys(),
            key=lambda fn: sum(usage_data.get(fn, {}).values()),
            reverse=True
        )

        for func_name in sorted_functions:
            source_file = functions_map.get(func_name, "Unknown")
            usages = usage_data.get(func_name, {})
            total_uses = sum(usages.values())
            is_unused = "YES" if func_name in unused_functions else "NO"
            used_in = ", ".join(usages.keys()) if usages else "None"

            writer.writerow([func_name, source_file, total_uses, is_unused, used_in])

    print(f"Results written to {output_file}")


# Example usage
if __name__ == "__main__":
    directory = "."  # Current directory, change as needed
    usage_data, unused_funcs, funcs_map = find_function_usage(directory)
    write_results_to_csv(usage_data, unused_funcs, funcs_map)

Scanning for function definitions...
Found function: filter in webui_backend\config.py
Found function: run_migrations in webui_backend\config.py
Found function: load_json_config in webui_backend\config.py
Found function: save_to_db in webui_backend\config.py
Found function: reset_config in webui_backend\config.py
Found function: get_config in webui_backend\config.py
Found function: get_config_value in webui_backend\config.py
Found function: save_config in webui_backend\config.py
Found function: update in webui_backend\config.py
Found function: save in webui_backend\config.py
Found function: load_oauth_providers in webui_backend\config.py
Found function: google_oauth_register in webui_backend\config.py
Found function: microsoft_oauth_register in webui_backend\config.py
Found function: github_oauth_register in webui_backend\config.py
Found function: oidc_oauth_register in webui_backend\config.py
Found function: validate_cors_origins in webui_backend\config.py
Found function: validate_cor