In [None]:
import os
import json
import time
import asyncio
import aiohttp
import aiofiles
import logging
import subprocess
from pathlib import Path
from typing import List, Dict, Any, Tuple, Optional

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[logging.FileHandler("refactoring.log"), logging.StreamHandler()]
)
logger = logging.getLogger("refactoring")

# Cost definitions
INPUT_COST_PER_TOKEN: float = 0.000002  # $2.00 / 1M prompt tokens
OUTPUT_COST_PER_TOKEN: float = 0.00001  # $10.00 / 1M completion tokens

# Configuration
CONFIG: Dict[str, Any] = {
    "base_url": "https://api.x.ai/v1",
    "api_key": "xai-Lggu94vl22xIj5ThXFVu9nxbXjqYOqbTtLlxnibCypTkA5F8N4SvT0SDAYKIK8EkQCuVHv3tbfOPGiwt",
    "progress_file": "refactoring_progress.json",
    "failed_file": "refactoring_failed.json",
    "source_directories": [r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib"],
    "skip_directories": [
        r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\i18n",
        r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\channel",
        r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\playground",
        r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\icons"
    ],
    "model": "grok-2-latest",
    "max_context_tokens": 131072,
    "request_delay": 1.0,
    "max_retries": 1,
    "retry_delay": 5,
    "timeout": 120,
    "cost_budget": 3.0,
    "file_extensions": [".py", ".ts", ".js", ".svelte"],
    "prompt_template": """Refactor the provided code according to these guidelines and return only the modified code:

- Remove all code related to: Tika, document_intelligence, check_for_version_updates, playground, webhook_url, ldap, oauth, enable_community_sharing, share_chat, enable_channels, channels, channel_id, deepgram, webhook, proxy, youtube_proxy, non_local_voices, haptic, mobile, i18n, getVersionUpdates
- Organize imports: standard library first, then third-party, then local; use explicit imports (no wildcards); remove unused imports
- Add TypeScript types where applicable
- Add documentation:
  * Single-line JSDoc comments (/** Comment */) for simple JavaScript/TypeScript functions or sections
  * Multi-line JSDoc (/** ... */) for JavaScript/TypeScript/Svelte functions with params, returns, etc.
  * Single-line docstrings ('Comment') for simple Python functions or sections
  * Multi-line docstrings (''' ... ''') for Python modules/classes/functions with params, returns, exceptions
  * HTML comments for component sections
- Error handling: add try/catch for async ops, use specific exceptions, log errors, add fallbacks
- UI components: add semantic HTML comments, consistent class naming, maintain functionality
- Code quality: remove dead code/redundancies, use clear variable names, simplify expressions, consistent formatting
- Requirements:
  * Preserve functionality and original file type
  * Keep exported function names and public API
  * Replace i18n references with direct text (e.g., $i18n.t('text') -> 'text')

Input code:
{content}

Output only the refactored code without any explanations or additional text.""",
    "auto_commit": True,
    "auto_commit_interval": 4,
    "requests_per_minute": 60,
    "git_root_directory": r"C:\Users\harold.noble\Desktop\RIC",
    "max_input_tokens": 120000
}


def estimate_tokens(text: str) -> int:
    """Estimate token count (1 token ≈ 4 characters)."""
    return len(text) // 4 + 1


async def refactor_code(session: aiohttp.ClientSession, file_path: str, prompt_template: str) -> Tuple[str, Dict[str, Any]]:
    """Refactor a single file using the AI model."""
    try:
        async with aiofiles.open(file_path, "r", encoding="utf-8", errors="replace") as file:
            code_content: str = await file.read()

        input_tokens: int = estimate_tokens(code_content)
        prompt_base_tokens: int = estimate_tokens(prompt_template.format(content=""))
        total_input_tokens: int = input_tokens + prompt_base_tokens

        if total_input_tokens > CONFIG["max_input_tokens"]:
            logger.error(f"File {file_path} exceeds input limit: {total_input_tokens} > {CONFIG['max_input_tokens']}")
            return "", {}

        if total_input_tokens >= CONFIG["max_context_tokens"]:
            logger.error(f"File {file_path} too large: {total_input_tokens} tokens >= {CONFIG['max_context_tokens']}")
            return "", {}

        formatted_prompt: str = prompt_template.format(content=code_content)
        max_output_tokens: int = CONFIG["max_context_tokens"] - total_input_tokens - 1000

        if max_output_tokens < 1000:
            logger.error(f"Insufficient output token budget for {file_path}: {max_output_tokens}")
            return "", {}

        headers = {
            "Authorization": f"Bearer {CONFIG['api_key']}",
            "Content-Type": "application/json"
        }

        payload = {
            "model": CONFIG["model"],
            "messages": [{"role": "user", "content": formatted_prompt}],
            "temperature": 0.7,
            "max_tokens": max_output_tokens
        }

        for attempt in range(CONFIG["max_retries"] + 1):
            try:
                await asyncio.sleep(60 / CONFIG["requests_per_minute"])
                logger.info(f"Processing {file_path} (attempt {attempt + 1})")

                async with session.post(
                    f"{CONFIG['base_url']}/chat/completions",
                    json=payload,
                    headers=headers,
                    timeout=aiohttp.ClientTimeout(total=CONFIG["timeout"])
                ) as response:
                    if response.status != 200:
                        error_text = await response.text()
                        logger.error(f"API error {response.status} for {file_path}: {error_text[:200]}")
                        if response.status in (500, 502, 503, 520) and attempt < CONFIG["max_retries"]:
                            delay = CONFIG["retry_delay"] * (2 ** attempt)
                            await asyncio.sleep(delay)
                            continue
                        return "", {}

                    result: Dict[str, Any] = await response.json()
                    if "choices" not in result or not result["choices"]:
                        logger.error(f"Invalid API response for {file_path}: {result}")
                        return "", {}

                    return result["choices"][0]["message"]["content"], result.get("usage", {})

            except asyncio.TimeoutError:
                logger.warning(f"Timeout for {file_path} (attempt {attempt + 1})")
                if attempt < CONFIG["max_retries"]:
                    await asyncio.sleep(CONFIG["retry_delay"] * (2 ** attempt))
            except Exception as e:
                logger.error(f"Error for {file_path} (attempt {attempt + 1}): {e}")
                if attempt < CONFIG["max_retries"]:
                    await asyncio.sleep(CONFIG["retry_delay"] * (2 ** attempt))
                else:
                    return "", {}

        return "", {}

    except Exception as e:
        logger.error(f"File access error {file_path}: {e}")
        return "", {}


async def commit_files(files: List[str], git_root: str) -> None:
    """Commit refactored files to git."""
    if not CONFIG["auto_commit"] or not files:
        return

    try:
        os.chdir(git_root)
        subprocess.run(["git", "add"] + [os.path.relpath(f, git_root) for f in files], check=True, capture_output=True, text=True)
        subprocess.run(["git", "commit", "-m", f"Refactor {len(files)} files using AI"], check=True, capture_output=True, text=True)
        logger.info(f"Committed {len(files)} files")
    except subprocess.CalledProcessError as e:
        logger.error(f"Git commit failed: {e.stderr}")


def scan_directories() -> List[str]:
    """Scan directories for files to refactor."""
    file_paths: List[str] = []
    skip_dirs: List[Path] = [Path(d).resolve() for d in CONFIG["skip_directories"]]

    for directory in CONFIG["source_directories"]:
        dir_path = Path(directory).resolve()
        if not dir_path.is_dir():
            logger.warning(f"Invalid directory: {directory}")
            continue

        if any(dir_path.is_relative_to(skip) or dir_path == skip for skip in skip_dirs):
            continue

        for ext in CONFIG["file_extensions"]:
            for file_path in dir_path.glob(f"**/*{ext}"):
                if not any(file_path.is_relative_to(skip) or file_path.parent == skip for skip in skip_dirs):
                    file_paths.append(str(file_path))

    return file_paths


async def load_progress_data() -> Tuple[Dict[str, Any], Dict[str, Any]]:
    """Load progress and failed files data."""
    progress_data: Dict[str, Any] = {"processed_files": [], "total_cost": 0.0}
    failed_data: Dict[str, Any] = {"failed_files": {}}

    for file, data in [(CONFIG["progress_file"], progress_data), (CONFIG["failed_file"], failed_data)]:
        if os.path.exists(file):
            try:
                async with aiofiles.open(file, "r") as f:
                    content = await f.read()
                    if content.strip():
                        data.update(json.loads(content))
            except Exception as e:
                logger.warning(f"Error loading {file}: {e}")
    return progress_data, failed_data


async def save_progress_data(progress_data: Dict[str, Any], failed_data: Dict[str, Any]) -> None:
    """Save progress and failed files data."""
    for file, data in [(CONFIG["progress_file"], progress_data), (CONFIG["failed_file"], failed_data)]:
        try:
            async with aiofiles.open(file, "w") as f:
                await f.write(json.dumps(data, indent=2))
        except Exception as e:
            logger.error(f"Error saving {file}: {e}")


async def process_files(num_files: Optional[int] = None) -> None:
    """Process files from configured directories."""
    if not CONFIG["api_key"]:
        logger.error("API key missing")
        return

    file_paths = scan_directories()
    progress_data, failed_data = await load_progress_data()
    total_files = len(file_paths)  # Total files found
    logger.info(f"Total files found in directories: {total_files}")

    unprocessed_files = [f for f in file_paths if f not in progress_data["processed_files"]]
    logger.info(f"Unprocessed files before processing: {len(unprocessed_files)}")
    files_to_process = unprocessed_files[:num_files] if num_files else unprocessed_files

    if not files_to_process:
        logger.info("No files to process")
        logger.info(f"Remaining unprocessed files: {len(unprocessed_files)}")
        return

    modified_files: List[str] = []
    processed_count = 0

    async with aiohttp.ClientSession() as session:
        for file_path in files_to_process:
            if num_files is not None and processed_count >= num_files:
                break

            refactored_code, usage = await refactor_code(session, file_path, CONFIG["prompt_template"])
            if not refactored_code:
                failed_data["failed_files"][file_path] = {"timestamp": time.time(), "reason": "Refactoring failed"}
                await save_progress_data(progress_data, failed_data)
                continue

            file_cost = (usage.get("prompt_tokens", 0) * INPUT_COST_PER_TOKEN +
                         usage.get("completion_tokens", 0) * OUTPUT_COST_PER_TOKEN)
            total_cost = progress_data["total_cost"] + file_cost

            if CONFIG["cost_budget"] > 0 and total_cost > CONFIG["cost_budget"]:
                logger.warning(f"Cost budget exceeded: ${total_cost:.6f}")
                break

            try:
                async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
                    original = await f.read()
                async with aiofiles.open(file_path, "w", encoding="utf-8") as f:
                    await f.write(refactored_code)

                if original != refactored_code:
                    modified_files.append(file_path)

                progress_data["processed_files"].append(file_path)
                progress_data["total_cost"] = total_cost
                processed_count += 1
                logger.info(f"Processed {file_path} - Cost: ${file_cost:.6f}")

                if CONFIG["auto_commit"] and len(modified_files) >= CONFIG["auto_commit_interval"]:
                    await commit_files(modified_files, CONFIG["git_root_directory"])
                    modified_files = []

                await save_progress_data(progress_data, failed_data)
                await asyncio.sleep(CONFIG["request_delay"])
            except Exception as e:
                logger.error(f"Error processing {file_path}: {e}")
                failed_data["failed_files"][file_path] = {"timestamp": time.time(), "reason": str(e)}
                await save_progress_data(progress_data, failed_data)

        if CONFIG["auto_commit"] and modified_files:
            await commit_files(modified_files, CONFIG["git_root_directory"])

    # Calculate and log remaining files
    remaining_files = len(unprocessed_files) - processed_count
    logger.info(f"Processed {processed_count} files this run")
    logger.info(f"Total files remaining to process: {remaining_files}")

In [None]:
await process_files(num_files=20)

2025-03-10 13:23:04,432 - refactoring - INFO - Total files found in directories: 233
2025-03-10 13:23:04,433 - refactoring - INFO - Unprocessed files before processing: 219
2025-03-10 13:23:05,445 - refactoring - INFO - Processing C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\utils\index.ts (attempt 1)
2025-03-10 13:24:05,325 - refactoring - ERROR - API error 520 for C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\utils\index.ts: <!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js ie6 oldie" lang="en-US"> <![endif]-->
<!--[if IE 7]>    <html class="no-js ie7 oldie" lang="en-US"> <![endif]-->
<!--[if IE 8]>    <html class="no-
2025-03-10 13:24:11,340 - refactoring - INFO - Processing C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\utils\index.ts (attempt 2)
2025-03-10 13:25:11,345 - refactoring - ERROR - API error 520 for C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\utils\index.ts: <!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js ie6 oldie" lang="en

CancelledError: 