In [None]:
import os
import json
import time
import asyncio
import aiohttp
import aiofiles
import logging
import subprocess
from pathlib import Path
from typing import List, Dict, Any, Tuple, Optional

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[logging.FileHandler("refactoring.log"), logging.StreamHandler()]
)
logger = logging.getLogger("refactoring")

# Cost definitions
INPUT_COST_PER_TOKEN: float = 0.000002  # $2.00 / 1M prompt tokens
OUTPUT_COST_PER_TOKEN: float = 0.00001  # $10.00 / 1M completion tokens



# Configuration


CONFIG: Dict[str, Any] = {
    "base_url": "https://api.x.ai/v1",  # "https://openrouter.ai/api/v1" or "https://api.x.ai/v1"

    "api_key": "xai-Lggu94vl22xIj5ThXFVu9nxbXjqYOqbTtLlxnibCypTkA5F8N4SvT0SDAYKIK8EkQCuVHv3tbfOPGiwt",  # openrouter "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", "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"
    # xai "xai-Lggu94vl22xIj5ThXFVu9nxbXjqYOqbTtLlxnibCypTkA5F8N4SvT0SDAYKIK8EkQCuVHv3tbfOPGiwt"

    "progress_file": "refactoring_progress.json",
    "failed_file": "refactoring_failed.json",
    "source_directories": [r"C:\Users\HJN\Desktop\open-webui\src"],
    "skip_directories": [r"C:\Users\HJN\Desktop\open-webui\src\tests", r"C:\Users\HJN\Desktop\open-webui\src\vendor"],


    "source_directories": [r"C:\Users\HJN\Desktop\open-webui\src"],


    "model": "grok-2-latest",  # "qwen/qwq-32b:free" or "grok-2-latest"


    "request_delay": 1.0,


    "max_retries": 3,


    "retry_delay": 5,


    "cost_budget": 10.0,


    "file_extensions": [".py", ".ts", ".js", ".svelte"],


    "prompt_template": """Refactor provided code following these guidelines:



    Remove components: Delete all code related to Tika, document_intelligence, check_for_version_updates, playground, webhook_url, ldap, oauth, enable_community_sharing, share_chat, enable_channels, channels, channel_id, deepgram, webhook, proxy, youtube_proxy, non_local_voices, haptic, mobile, i18n references.


    Clean architecture:


        Organize imports (standard library, third-party, local).


        Use explicit imports, avoid wildcards.


        Remove unused imports, variables.


        Apply TypeScript types where applicable.


    Enhance documentation:


        Add JSDoc comments for TypeScript, Svelte, functions.


        Add docstrings for Python modules, classes, functions.


        Include parameter descriptions, return types, exceptions.


        Add HTML comments for component sections.


    Improve error handling:


        Add try/catch for async operations.


        Use specific exception types.


        Log errors appropriately.


        Provide fallback behavior.


    Optimize UI components:


        Add semantic HTML comments.


        Maintain consistent class naming, structure.


        Preserve functionality, improve readability.


    Code quality:


        Remove redundancies, dead code.


        Use clear variable names.


        Simplify complex expressions, conditionals.


        Apply consistent formatting.


    Key requirements:


        Preserve functionality.


        Keep original file type.


        Retain exported function names, public API.

        Replace all instances of 'open webui', 'open_webui', and similar variations with 'Chat App' or 'chat_app' while preserving the original formatting style.
        
        Remove i18n references, replace with direct text (e.g., $i18n.t('text') -> 'text').


    Naming: Use snake_case for functions.


    Code to refactor: [INSERT_CODE_HERE]


    {content}

    """,


    "auto_commit": True,


    "auto_commit_interval": 4,


    "requests_per_minute": 60,


    "git_root_directory": r"C:\Users\HJN\Desktop\open-webui"


}



async def refactor_code(session: aiohttp.ClientSession, file_path: str, prompt_template: str) -> Tuple[str, Dict[str, Any]]:
    """Refactor a single file using the AI model."""
    if not os.path.exists(file_path):
        logger.warning(f"Skipping {file_path}: File doesn't exist")
        return "", {}

    try:
        async with aiofiles.open(file_path, "r", encoding="utf-8", errors="replace") as file:
            code_content: str = await file.read()

        formatted_prompt: str = prompt_template.format(content=code_content)

        for attempt in range(CONFIG["max_retries"] + 1):
            try:
                await asyncio.sleep(60 / CONFIG["requests_per_minute"])
                logger.info(f"Processing {file_path} (attempt {attempt + 1})...")

                payload = {
                    "model": CONFIG["model"],
                    "messages": [{"role": "user", "content": formatted_prompt}],
                    "temperature": 0.7,
                    "max_tokens": 4000
                }

                headers = {
                    "Authorization": f"Bearer {CONFIG['api_key']}",
                    "Content-Type": "application/json"
                }

                async with session.post(
                    f"{CONFIG['base_url']}/chat/completions",
                    json=payload,
                    headers=headers,
                    timeout=60
                ) as response:
                    if response.status != 200:
                        error_text = await response.text()
                        logger.error(f"API error: {response.status}, {error_text}")
                        continue

                    result: Dict[str, Any] = await response.json()
                    if "choices" not in result or not result["choices"]:
                        logger.error(f"Unexpected API response: {result}")
                        continue

                    return result["choices"][0]["message"]["content"], result.get("usage", {})

            except Exception as e:
                logger.warning(f"Error refactoring {file_path} (attempt {attempt + 1}): {e}")
                if attempt < CONFIG["max_retries"]:
                    await asyncio.sleep(CONFIG["retry_delay"] * (2 ** attempt))
                else:
                    logger.error(f"Failed {file_path} after {CONFIG['max_retries']} retries")
                    return "", {}

    except Exception as e:
        logger.error(f"File read error {file_path}: {e}")
        return "", {}


async def commit_files(files: List[str], git_root: str) -> None:
    """Commit refactored files to git using subprocess."""
    if not CONFIG["auto_commit"] or not files:
        logger.info("Auto-commit skipped: either disabled or no files to commit")
        return

    try:
        os.chdir(git_root)
        for file in files:
            relative_path: str = os.path.relpath(file, git_root)
            result = subprocess.run(
                ["git", "add", relative_path],
                check=True,
                capture_output=True,
                text=True
            )
            logger.debug(f"Git add output for {relative_path}: {result.stdout}")

        commit_message = f"Refactor {len(files)} files using AI"
        result = subprocess.run(
            ["git", "commit", "-m", commit_message],
            check=True,
            capture_output=True,
            text=True
        )
        logger.info(f"Successfully committed {len(files)} files: {result.stdout}")

    except subprocess.CalledProcessError as e:
        logger.error(f"Git commit failed: {e.stderr}")
    except Exception as e:
        logger.error(f"Unexpected error during git commit: {e}")


def scan_directories() -> List[str]:
    """Scan configured directories for files to refactor, skipping specified directories."""
    file_paths: List[str] = []
    skip_dirs: List[Path] = [Path(d).resolve() for d in CONFIG.get("skip_directories", [])]  # Resolve paths to absolute

    for directory in CONFIG["source_directories"]:
        dir_path: Path = Path(directory)
        if not dir_path.exists() or not dir_path.is_dir():
            logger.warning(f"Directory not found or not a directory: {directory}")
            continue

        # Check if the directory itself should be skipped
        resolved_dir = dir_path.resolve()
        if any(resolved_dir.is_relative_to(skip_dir) or resolved_dir == skip_dir for skip_dir in skip_dirs):
            logger.info(f"Skipping directory {directory} as it matches a skip_directories entry")
            continue

        for ext in CONFIG["file_extensions"]:
            for file_path in dir_path.glob(f"**/*{ext}"):
                # Check if the file is in a skipped directory
                file_resolved = file_path.resolve()
                if not any(file_resolved.is_relative_to(skip_dir) or file_resolved.parent == skip_dir for skip_dir in skip_dirs):
                    file_paths.append(str(file_path))
                else:
                    logger.debug(f"Skipping file {file_path} due to being in a skipped directory")

    logger.info(f"Found {len(file_paths)} files to process after applying skip rules")
    return file_paths


async def load_progress_data() -> Tuple[Dict[str, Any], Dict[str, Any]]:
    """Load progress and failed files data from disk."""
    progress_data: Dict[str, Any] = {"processed_files": [], "total_cost": 0.0}
    failed_data: Dict[str, Any] = {"failed_files": {}}

    for file, data in [(CONFIG["progress_file"], progress_data), (CONFIG["failed_file"], failed_data)]:
        if os.path.exists(file):
            try:
                async with aiofiles.open(file, "r") as f:
                    file_content = await f.read()
                    if file_content.strip():
                        data.update(json.loads(file_content))
            except Exception as e:
                logger.warning(f"Error loading {file}: {e}")
    return progress_data, failed_data


async def save_progress_data(progress_data: Dict[str, Any], failed_data: Dict[str, Any]) -> None:
    """Save progress and failed files data to disk."""
    for file, data in [(CONFIG["progress_file"], progress_data), (CONFIG["failed_file"], failed_data)]:
        try:
            async with aiofiles.open(file, "w") as f:
                await f.write(json.dumps(data, indent=2))
        except Exception as e:
            logger.error(f"Error saving to {file}: {e}")


async def process_files(num_files: Optional[int] = None) -> None:
    """Process files from configured directories."""
    if not CONFIG["api_key"]:
        logger.error("API_KEY not set in CONFIG")
        return

    file_paths: List[str] = scan_directories()
    progress_data, failed_data = await load_progress_data()
    prompt_template: str = CONFIG["prompt_template"]
    git_root: str = CONFIG["git_root_directory"]

    if not os.path.exists(os.path.join(git_root, ".git")):
        logger.warning(f"Git root {git_root} is not a git repository. Disabling auto-commit.")
        CONFIG["auto_commit"] = False

    unprocessed_files = [f for f in file_paths if f not in progress_data["processed_files"]]
    logger.info(f"Found {len(unprocessed_files)} unprocessed files out of {len(file_paths)} total files")

    files_to_process = unprocessed_files[:num_files] if num_files is not None else unprocessed_files
    logger.info(f"Processing {len(files_to_process)} files")

    if not files_to_process:
        logger.info("No files to process")
        return

    modified_files: List[str] = []
    processed_count: int = 0

    async with aiohttp.ClientSession() as session:
        for file_path in files_to_process:
            if num_files is not None and processed_count >= num_files:
                break

            try:
                async with aiofiles.open(file_path, "r", encoding="utf-8", errors="replace") as f:
                    original_code: str = await f.read()
            except Exception as e:
                logger.error(f"Error reading {file_path}: {e}")
                failed_data["failed_files"][file_path] = {"timestamp": time.time(), "reason": str(e)}
                await save_progress_data(progress_data, failed_data)
                continue

            refactored_code, usage = await refactor_code(session, file_path, prompt_template)
            if not refactored_code:
                logger.error(f"Refactoring failed for {file_path}")
                failed_data["failed_files"][file_path] = {"timestamp": time.time(), "reason": "Refactoring failed"}
                await save_progress_data(progress_data, failed_data)
                continue

            prompt_tokens: int = usage.get("prompt_tokens", 0)
            completion_tokens: int = usage.get("completion_tokens", 0)
            file_cost: float = (prompt_tokens * INPUT_COST_PER_TOKEN) + (completion_tokens * OUTPUT_COST_PER_TOKEN)
            total_cost = progress_data["total_cost"] + file_cost

            if CONFIG["cost_budget"] > 0 and total_cost > CONFIG["cost_budget"]:
                logger.warning(f"Cost budget exceeded: ${total_cost:.6f} > ${CONFIG['cost_budget']:.6f}")
                break

            try:
                async with aiofiles.open(file_path, "w", encoding="utf-8") as f:
                    await f.write(refactored_code)
                logger.info(f"Refactored {file_path}")
            except Exception as e:
                logger.error(f"Error writing to {file_path}: {e}")
                failed_data["failed_files"][file_path] = {"timestamp": time.time(), "reason": str(e)}
                await save_progress_data(progress_data, failed_data)
                continue

            if original_code != refactored_code:
                modified_files.append(file_path)

            progress_data["processed_files"].append(file_path)
            progress_data["total_cost"] = total_cost
            processed_count += 1
            logger.info(f"Processed {file_path} - Cost: ${file_cost:.6f}, Total: ${total_cost:.6f}")

            if CONFIG["auto_commit"] and len(modified_files) >= CONFIG["auto_commit_interval"]:
                await commit_files(modified_files, git_root)
                modified_files = []

            await save_progress_data(progress_data, failed_data)
            await asyncio.sleep(CONFIG["request_delay"])

        if CONFIG["auto_commit"] and modified_files:
            logger.info(f"Final commit of {len(modified_files)} files")
            await commit_files(modified_files, git_root)

    logger.info(f"Completed processing {processed_count} files")

In [2]:
await process_files(num_files=1)

2025-03-09 19:39:09,488 - refactoring - INFO - Found 372 files to process
2025-03-09 19:39:09,492 - refactoring - INFO - Found 371 unprocessed files out of 372 total files
2025-03-09 19:39:09,493 - refactoring - INFO - Processing 1 files
2025-03-09 19:39:10,496 - refactoring - INFO - Processing C:\Users\HJN\Desktop\open-webui\src\lib\constants.ts (attempt 1)...
2025-03-09 19:40:17,083 - refactoring - INFO - Processing C:\Users\HJN\Desktop\open-webui\src\lib\constants.ts (attempt 2)...
2025-03-09 19:40:58,133 - refactoring - INFO - Refactored C:\Users\HJN\Desktop\open-webui\src\lib\constants.ts
2025-03-09 19:40:58,135 - refactoring - INFO - Processed C:\Users\HJN\Desktop\open-webui\src\lib\constants.ts - Cost: $0.026330, Total: $0.052500
2025-03-09 19:40:59,146 - refactoring - INFO - Final commit of 1 files
2025-03-09 19:40:59,441 - refactoring - INFO - Successfully committed 1 files: [main fb528b7c8] Refactor 1 files using AI
 1 file changed, 41 insertions(+), 7 deletions(-)

2025-03-0