In [None]:
import glob
import subprocess
import os
import io
import tokenize
import ast
import json
import re
import threading
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm


def remove_comments_and_docstrings(source: str) -> str:
    """Removes comments and docstrings from Python source code."""
    io_obj = io.StringIO(source)
    output_tokens = []
    prev_toktype = tokenize.INDENT
    last_lineno = -1
    last_col = 0
    for tok in tokenize.generate_tokens(io_obj.readline):
        token_type, token_string, (start_line, start_col), (end_line, end_col), _ = tok
        if start_line > last_lineno:
            last_col = 0
        # Remove comments
        if token_type == tokenize.COMMENT:
            continue
        # Remove docstrings (multi-line & single-line)
        if token_type == tokenize.STRING and prev_toktype in {tokenize.INDENT, tokenize.NEWLINE}:
            continue
        output_tokens.append((token_type, token_string))
        prev_toktype = token_type
        last_col = end_col
        last_lineno = end_line
    # Convert tokens back to source code
    new_source = tokenize.untokenize(output_tokens)
    # Remove empty lines
    return '\n'.join(line for line in new_source.splitlines() if line.strip())


def process_python_file(file_path: str):
    """Reads a Python file, removes comments/docstrings, and overwrites it."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            stripped_code = remove_comments_and_docstrings(f.read())
        # Save back to the original file
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(stripped_code)
        return True
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return False


def clean_python_files(target_dir):
    """Find all .py files in the target directory and clean them up."""
    print("Phase 1: Cleaning Python files...")
    py_files = glob.glob(os.path.join(target_dir, '**', '*.py'), recursive=True)

    # Process files with a progress bar
    with tqdm(total=len(py_files), desc="Removing comments & docstrings", unit="file") as pbar:
        for file in py_files:
            # Step 1: Remove comments & docstrings
            process_python_file(file)
            # Step 2: Run autoflake to clean up unused imports/variables
            try:
                subprocess.run(['autoflake', '--in-place', '--remove-all-unused-imports', '--remove-unused-variables', file],
                               check=True, capture_output=True)
            except subprocess.CalledProcessError as e:
                print(f"Warning: autoflake failed on {file}: {e}")
            except FileNotFoundError:
                print("Warning: autoflake not found. Install with 'pip install autoflake' to remove unused imports.")
                break
            pbar.update(1)

    print(f"Processed {len(py_files)} Python files.")
    return py_files


def extract_definitions(file_path):
    """Extract defined variables and functions from a Python file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            code = f.read()
        tree = ast.parse(code)
        definitions = {}
        for node in ast.walk(tree):
            if isinstance(node, ast.FunctionDef):
                definitions[node.name] = {'type': 'function', 'context': code}
            elif isinstance(node, ast.Assign):
                for target in node.targets:
                    if isinstance(target, ast.Name) and isinstance(target.ctx, ast.Store):
                        definitions[target.id] = {'type': 'variable', 'context': code}
        return definitions
    except Exception as e:
        print(f"Error extracting definitions from {file_path}: {e}")
        return {}


def suggest_better_name(name, def_type, context):
    """Suggest a snake_case name based on context."""
    # Simple conversion to snake_case
    if name.isupper():  # Likely a constant, keep it
        return name

    # Convert camelCase/PascalCase to snake_case
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
    snake_case = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()

    # Don't rename if it's already snake_case or starts with underscore
    if snake_case == name or name.startswith('_'):
        return name

    return snake_case


def generate_rename_map(directory, map_file):
    """Generate a map of names to their snake_case equivalents."""
    print("\nPhase 2: Generating rename map...")
    all_definitions = {}

    # Get all Python files
    py_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.py'):
                py_files.append(os.path.join(root, file))

    # Process files with a progress bar
    with tqdm(total=len(py_files), desc="Extracting definitions", unit="file") as pbar:
        for file_path in py_files:
            defs = extract_definitions(file_path)
            all_definitions[file_path] = defs
            pbar.update(1)

    rename_map = {}
    for file_path, defs in all_definitions.items():
        for name, info in defs.items():
            if name not in rename_map:  # Only suggest once per unique name
                new_name = suggest_better_name(name, info['type'], info['context'])
                if new_name != name and not new_name.startswith('_'):  # Avoid private names
                    rename_map[name] = new_name

    # Save to JSON
    with open(map_file, 'w', encoding='utf-8') as f:
        json.dump(rename_map, f, indent=4)

    print(f"Generated rename map with {len(rename_map)} entries, saved to {map_file}")
    return rename_map


def replace_in_file(file_path, rename_map):
    """Replace occurrences in a file using regex with word boundaries."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        original_content = content

        # Create a regex pattern that matches any of the keys with word boundaries
        pattern = r'\b(?:' + '|'.join(re.escape(key) for key in rename_map.keys()) + r')\b'

        def replacer(match):
            return rename_map[match.group(0)]

        new_content = re.sub(pattern, replacer, content)

        # Only write if content changed
        if new_content != original_content:
            with open(file_path, 'w', encoding='utf-8') as f:
                f.write(new_content)
            return True
        return False
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return False


def process_directory(directory, rename_map):
    """Process all Python files in directory using multiple threads."""
    print("\nPhase 3: Applying renames to files...")
    py_files = [os.path.join(root, file)
                for root, _, files in os.walk(directory)
                for file in files if file.endswith('.py')]

    if not py_files:
        print("No Python files found in the directory.")
        return

    results = [False] * len(py_files)

    # Function to be executed by each thread
    def process_file(index, file_path):
        results[index] = replace_in_file(file_path, rename_map)

    # Create and start threads with progress bar
    with tqdm(total=len(py_files), desc="Replacing variables/functions", unit="file") as pbar:
        with ThreadPoolExecutor(max_workers=os.cpu_count() or 4) as executor:
            # Submit tasks and keep track of futures
            futures = []
            for i, file_path in enumerate(py_files):
                future = executor.submit(process_file, i, file_path)
                future.add_done_callback(lambda p: pbar.update(1))
                futures.append(future)

            # Wait for all tasks to complete
            for future in futures:
                try:
                    future.result()
                except Exception as e:
                    print(f"Thread error: {e}")

    changed_files = sum(results)
    print(f"Completed! Changed {changed_files} of {len(py_files)} files.")
    return changed_files


def main():
    print("=== Python Code Cleanup and Rename Tool ===")
    target_dir = input("Enter your project directory: ")

    if not os.path.isdir(target_dir):
        print(f"Error: '{target_dir}' is not a valid directory.")
        return

    # Create a backup first
    backup_option = input("Do you want to create a backup before proceeding? (y/n): ").lower()
    if backup_option == 'y':
        import shutil
        from datetime import datetime

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        backup_dir = f"{target_dir}_backup_{timestamp}"
        print(f"Creating backup to {backup_dir}...")
        shutil.copytree(target_dir, backup_dir)
        print("Backup created!")

    # Clean Python files (remove comments, docstrings, unused imports)
    clean_python_files(target_dir)

    # Generate rename map
    map_file = os.path.join(os.path.dirname(target_dir), "rename_map.json")
    rename_map = generate_rename_map(target_dir, map_file)

    # Preview changes
    print("\nPreview of renames:")
    for i, (old, new) in enumerate(list(rename_map.items())[:10], 1):
        print(f"{i}. {old} -> {new}")

    if len(rename_map) > 10:
        print(f"... and {len(rename_map) - 10} more renames")

    # Ask to proceed
    proceed = input("\nDo you want to proceed with these renames? (y/n): ").lower()
    if proceed != 'y':
        print("Rename operation cancelled.")
        return

    # Apply renames
    process_directory(target_dir, rename_map)
    print("\nAll operations completed!")


if __name__ == "__main__":
    main()