In [7]:
import os
import json
import time
import asyncio
import aiohttp
import aiofiles
import logging
import subprocess
from pathlib import Path
from typing import List, Dict, Any, Tuple, Optional, Set
from functools import lru_cache

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[logging.FileHandler("refactoring.log"), logging.StreamHandler()]
)
logger = logging.getLogger("refactoring")

# Constants
INPUT_COST_PER_TOKEN = 0.000002  # $2.00 / 1M prompt tokens
OUTPUT_COST_PER_TOKEN = 0.00001  # $10.00 / 1M completion tokens
CHUNK_SIZE_LINES = 500  # For splitting files over 1000 lines
INPUT_TOKEN_FRACTION = 0.9  # Max input tokens = 90% of context tokens

# Configuration
CONFIG = {
    "base_url": "https://api.x.ai/v1",
    "api_key": "xai-Lggu94vl22xIj5ThXFVu9nxbXjqYOqbTtLlxnibCypTkA5F8N4SvT0SDAYKIK8EkQCuVHv3tbfOPGiwt",
    "progress_file": "refactoring_progress.json",
    "failed_file": "refactoring_failed.json",
    "source_directories": [r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src\routes"],
    "skip_directories": [
        r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\i18n",
        r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\channel",
        r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\playground",
        r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\icons",
    ],
    "model": "grok-2-latest",
    "auto_commit": False,
    "auto_commit_interval": 100,
    "requests_per_minute": 60,
    "concurrent_requests": 5,
    "git_root_directory": r"C:\Users\harold.noble\Desktop\RIC",
    "max_context_tokens": 131072,
    "request_delay": 1.0,
    "max_retries": 1,
    "retry_delay": 5,
    "timeout": 120,
    "cost_budget": 3.0,
    "file_extensions": [".py", ".ts", ".js", ".svelte"],
    "prompt_template": """Refactor the provided code according to these guidelines and return only the modified code. This code is from file: {file_path}

- Remove all code related to: Tika, document_intelligence, check_for_version_updates, playground, Chat direction, deepgram, webhook_url, ElevenLabs, ldap, azure, Azure AI Speech, oauth, enable_community_sharing, share_chat, enable_channels, channels, channel_id, deepgram, webhook, proxy, youtube_proxy, non_local_voices, haptic, mobile
- Organize imports: standard library first, then third-party, then local; use explicit imports (no wildcards); remove unused imports
- Add TypeScript types where applicable
- Add documentation:
  * Single-line JSDoc comments (/** Comment */) for simple JavaScript/TypeScript functions or sections
  * Multi-line JSDoc (/** ... */) for JavaScript/TypeScript/Svelte functions with params, returns, etc.
  * Single-line docstrings ('Comment') for simple Python functions or sections
  * Multi-line docstrings (''' ... ''') for Python modules/classes/functions with params, returns, exceptions
  * HTML comments for component sections
- Error handling: add try/catch for async ops, use specific exceptions, log errors, add fallbacks
- UI components: add semantic HTML comments, consistent class naming, maintain functionality
- Code quality: remove dead code/redundancies, use clear variable names, simplify expressions, consistent formatting
- For API error handling, use the handleApiError helper function from '../lib/apis/helpers.ts'
- Requirements:
  * Preserve functionality and original file type
  * Keep exported function names and public API
  * Replace i18n references with direct text (e.g., $i18n.t('text') -> 'text')

Input code:
{content}

Output only the refactored code without any explanations or additional text.""",
}


class RateLimiter:
    """Rate limiter for API requests."""

    def __init__(self, requests_per_minute: int):
        self.requests_per_minute = requests_per_minute
        self.interval = 60.0 / requests_per_minute
        self.last_request_time = 0
        self.lock = asyncio.Lock()

    async def acquire(self):
        """Wait until we can make another request."""
        async with self.lock:
            current_time = time.time()
            time_since_last = current_time - self.last_request_time
            if time_since_last < self.interval:
                await asyncio.sleep(self.interval - time_since_last)
            self.last_request_time = time.time()


@lru_cache(maxsize=100)
def estimate_tokens(text: str) -> int:
    """Estimate token count based on character length (1 token ≈ 4 chars)."""
    return len(text) // 4 + 1


async def split_file_content(content: str, file_path: str) -> List[str]:
    """Split file content into chunks if over 1000 lines."""
    lines = content.splitlines()
    if len(lines) <= 1000:
        return [content]

    logger.info(f"Splitting {file_path} ({len(lines)} lines) into chunks")
    chunks = []
    for i in range(0, len(lines), CHUNK_SIZE_LINES):
        chunk_lines = lines[i:i + CHUNK_SIZE_LINES]
        chunks.append("\n".join(chunk_lines))
    return chunks


async def refactor_code(
    session: aiohttp.ClientSession,
    file_path: str,
    content: str,
    prompt_template: str,
    rate_limiter: RateLimiter
) -> Tuple[str, Dict[str, Any]]:
    """Refactor a single chunk of code using the AI model."""
    max_input_tokens = int(CONFIG["max_context_tokens"] * INPUT_TOKEN_FRACTION)
    input_tokens = estimate_tokens(content)
    prompt_base_tokens = estimate_tokens(prompt_template.format(file_path="", content=""))
    total_input_tokens = input_tokens + prompt_base_tokens

    if total_input_tokens > max_input_tokens:
        logger.error(f"Chunk of {file_path} exceeds input limit: {total_input_tokens} > {max_input_tokens}")
        return "", {}

    if total_input_tokens >= CONFIG["max_context_tokens"]:
        logger.error(f"Chunk of {file_path} too large: {total_input_tokens} tokens >= {CONFIG['max_context_tokens']}")
        return "", {}

    formatted_prompt = prompt_template.format(file_path=file_path, content=content)
    max_output_tokens = CONFIG["max_context_tokens"] - total_input_tokens - 1000

    if max_output_tokens < 1000:
        logger.error(f"Insufficient output token budget for {file_path}: {max_output_tokens}")
        return "", {}

    headers = {"Authorization": f"Bearer {CONFIG['api_key']}", "Content-Type": "application/json"}
    payload = {
        "model": CONFIG["model"],
        "messages": [{"role": "user", "content": formatted_prompt}],
        "temperature": 0.7,
        "max_tokens": max_output_tokens
    }

    for attempt in range(CONFIG["max_retries"] + 1):
        try:
            await rate_limiter.acquire()
            logger.info(f"Refactoring chunk of {file_path} (attempt {attempt + 1})")
            async with session.post(
                f"{CONFIG['base_url']}/chat/completions",
                json=payload,
                headers=headers,
                timeout=aiohttp.ClientTimeout(total=CONFIG["timeout"])
            ) as response:
                if response.status != 200:
                    error_text = await response.text()
                    logger.error(f"API error {response.status} for {file_path}: {error_text[:200]}")
                    if response.status in (429, 500, 502, 503, 520) and attempt < CONFIG["max_retries"]:
                        backoff_time = CONFIG["retry_delay"] * (2 ** attempt)
                        logger.info(f"Backing off for {backoff_time}s before retry")
                        await asyncio.sleep(backoff_time)
                        continue
                    return "", {}

                result = await response.json()
                if "choices" not in result or not result["choices"]:
                    logger.error(f"Invalid API response for {file_path}: {result}")
                    return "", {}

                return result["choices"][0]["message"]["content"], result.get("usage", {})
        except asyncio.TimeoutError:
            logger.warning(f"Timeout for {file_path} (attempt {attempt + 1})")
            if attempt < CONFIG["max_retries"]:
                backoff_time = CONFIG["retry_delay"] * (2 ** attempt)
                logger.info(f"Backing off for {backoff_time}s before retry")
                await asyncio.sleep(backoff_time)
        except Exception as e:
            logger.error(f"Unexpected error for {file_path} (attempt {attempt + 1}): {e}")
            if attempt < CONFIG["max_retries"]:
                backoff_time = CONFIG["retry_delay"] * (2 ** attempt)
                logger.info(f"Backing off for {backoff_time}s before retry")
                await asyncio.sleep(backoff_time)
            else:
                return "", {}
    return "", {}


async def process_file(
    session: aiohttp.ClientSession,
    file_path: str,
    prompt_template: str,
    rate_limiter: RateLimiter
) -> Tuple[str, float]:
    """Process a single file, splitting if necessary, and save immediately."""
    try:
        async with aiofiles.open(file_path, "r", encoding="utf-8", errors="replace") as f:
            original_content = await f.read()

        chunks = await split_file_content(original_content, file_path)
        refactored_chunks = []
        total_usage = {"prompt_tokens": 0, "completion_tokens": 0}

        # Get relative path for use in prompts
        git_root = Path(CONFIG["git_root_directory"]).resolve()
        file_path_obj = Path(file_path).resolve()
        relative_path = file_path_obj.relative_to(git_root)

        for i, chunk in enumerate(chunks):
            refactored_chunk, usage = await refactor_code(
                session,
                str(relative_path),  # Use relative path for the prompt
                chunk,
                prompt_template,
                rate_limiter
            )
            if not refactored_chunk:
                logger.error(f"Failed to refactor chunk {i+1} of {file_path}")
                return "", 0.0
            refactored_chunks.append(refactored_chunk)
            total_usage["prompt_tokens"] += usage.get("prompt_tokens", 0)
            total_usage["completion_tokens"] += usage.get("completion_tokens", 0)

        # Reassemble all chunks into a single file before saving
        refactored_code = "\n".join(refactored_chunks)
        file_cost = (total_usage["prompt_tokens"] * INPUT_COST_PER_TOKEN +
                     total_usage["completion_tokens"] * OUTPUT_COST_PER_TOKEN)

        # Save immediately if modified
        if original_content != refactored_code:
            async with aiofiles.open(file_path, "w", encoding="utf-8") as f:
                await f.write(refactored_code)
            logger.info(f"Saved refactored {file_path} - Cost: ${file_cost:.6f}")
            return file_path, file_cost
        else:
            logger.info(f"No changes for {file_path}")
            return file_path, 0.0
    except Exception as e:
        logger.error(f"Error processing {file_path}: {e}")
        return "", 0.0


async def commit_files(files: List[str], git_root: str) -> None:
    """Commit refactored files to git."""
    if not CONFIG["auto_commit"] or not files:
        return
    try:
        os.chdir(git_root)
        subprocess.run(["git", "add"] + [os.path.relpath(f, git_root) for f in files], check=True, capture_output=True, text=True)
        commit_message = f"Refactor {len(files)} files using AI"
        result = subprocess.run(["git", "commit", "-m", commit_message], check=True, capture_output=True, text=True)
        logger.info(f"Committed {len(files)} files: {result.stdout}")
    except subprocess.CalledProcessError as e:
        logger.error(f"Git commit failed: {e.stderr}")
    except Exception as e:
        logger.error(f"Git commit error: {str(e)}")


@lru_cache(maxsize=1)
def scan_directories() -> List[str]:
    """Scan directories for files to refactor."""
    file_paths = []
    skip_dirs = [Path(d).resolve() for d in CONFIG["skip_directories"]]

    for directory in CONFIG["source_directories"]:
        dir_path = Path(directory).resolve()
        if not dir_path.is_dir():
            logger.warning(f"Invalid directory: {directory}")
            continue

        if any(dir_path.is_relative_to(skip) or dir_path == skip for skip in skip_dirs):
            continue

        for ext in CONFIG["file_extensions"]:
            for file_path in dir_path.glob(f"**/*{ext}"):
                if not any(file_path.is_relative_to(skip) or file_path.parent == skip for skip in skip_dirs):
                    file_paths.append(str(file_path))

    logger.info(f"Found {len(file_paths)} files to process")
    return file_paths


async def load_progress_data() -> Tuple[Dict[str, Any], Dict[str, Any]]:
    """Load progress and failed files data."""
    progress_data = {"processed_files": [], "total_cost": 0.0}
    failed_data = {"failed_files": {}}

    for file, data in [(CONFIG["progress_file"], progress_data), (CONFIG["failed_file"], failed_data)]:
        if os.path.exists(file):
            try:
                async with aiofiles.open(file, "r") as f:
                    content = await f.read()
                    if content.strip():
                        data.update(json.loads(content))
            except json.JSONDecodeError as e:
                logger.warning(f"JSON decode error in {file}: {e}")
                # Create backup of corrupted file
                backup_file = f"{file}.bak-{int(time.time())}"
                try:
                    os.rename(file, backup_file)
                    logger.info(f"Created backup of corrupted file: {backup_file}")
                except Exception as rename_err:
                    logger.error(f"Failed to backup corrupted file: {rename_err}")
            except Exception as e:
                logger.warning(f"Error loading {file}: {e}")

    return progress_data, failed_data


async def save_progress_data(progress_data: Dict[str, Any], failed_data: Dict[str, Any]) -> None:
    """Save progress and failed files data."""
    for file, data in [(CONFIG["progress_file"], progress_data), (CONFIG["failed_file"], failed_data)]:
        try:
            # Write to temporary file first
            temp_file = f"{file}.tmp"
            async with aiofiles.open(temp_file, "w") as f:
                await f.write(json.dumps(data, indent=2))

            # Rename to actual file (atomic operation)
            os.replace(temp_file, file)
        except Exception as e:
            logger.error(f"Error saving {file}: {e}")


async def process_files(num_files: Optional[int] = None) -> None:
    """Process files concurrently with immediate saves."""
    start_time = time.time()

    if not CONFIG["api_key"]:
        logger.error("API key missing")
        return

    file_paths = scan_directories()
    progress_data, failed_data = await load_progress_data()

    # Convert to set for faster lookups
    processed_set = set(progress_data["processed_files"])
    unprocessed_files = [f for f in file_paths if f not in processed_set]
    files_to_process = unprocessed_files[:num_files] if num_files else unprocessed_files

    logger.info(f"Files to process: {len(files_to_process)} of {len(file_paths)} total")

    if not files_to_process:
        logger.info("No files to process")
        return

    # Create shared resources
    rate_limiter = RateLimiter(CONFIG["requests_per_minute"])
    semaphore = asyncio.Semaphore(CONFIG["concurrent_requests"])
    modified_files = []
    commit_lock = asyncio.Lock()
    processed_count = 0
    success_count = 0

    # Progress tracking
    total_files = len(files_to_process)

    async def process_with_semaphore(session: aiohttp.ClientSession, file_path: str, file_index: int) -> None:
        nonlocal processed_count, success_count, modified_files

        async with semaphore:
            logger.info(f"Processing file {file_index+1}/{total_files}: {file_path}")
            result_file, file_cost = await process_file(session, file_path, CONFIG["prompt_template"], rate_limiter)

            processed_count += 1

            if not result_file:
                logger.error(f"Failed to process {file_path}")
                failed_data["failed_files"][file_path] = {"timestamp": time.time(), "reason": "Processing failed"}
            else:
                success_count += 1
                total_cost = progress_data["total_cost"] + file_cost

                if CONFIG["cost_budget"] > 0 and total_cost > CONFIG["cost_budget"]:
                    logger.warning(f"Cost budget exceeded: ${total_cost:.6f} > ${CONFIG['cost_budget']}")
                    return

                progress_data["processed_files"].append(result_file)
                progress_data["total_cost"] = total_cost

                if file_cost > 0:  # Indicates modification
                    async with commit_lock:
                        modified_files.append(result_file)
                        if CONFIG["auto_commit"] and len(modified_files) >= CONFIG["auto_commit_interval"]:
                            files_to_commit = modified_files.copy()
                            modified_files.clear()
                            await commit_files(files_to_commit, CONFIG["git_root_directory"])

            # Save progress periodically
            if processed_count % 10 == 0 or processed_count == total_files:
                await save_progress_data(progress_data, failed_data)

            # Log progress
            progress_pct = (processed_count / total_files) * 100
            logger.info(f"Progress: {processed_count}/{total_files} ({progress_pct:.1f}%) - Success: {success_count}")

    timeout_config = aiohttp.ClientTimeout(total=CONFIG["timeout"] * 2)  # Double timeout for session
    connector = aiohttp.TCPConnector(limit_per_host=CONFIG["concurrent_requests"])

    async with aiohttp.ClientSession(timeout=timeout_config, connector=connector) as session:
        tasks = [process_with_semaphore(session, file_path, i) for i, file_path in enumerate(files_to_process)]

        try:
            await asyncio.gather(*tasks)
        except Exception as e:
            logger.error(f"Error during processing: {e}")
        finally:
            # Final commit for any remaining files
            if CONFIG["auto_commit"] and modified_files:
                await commit_files(modified_files, CONFIG["git_root_directory"])

            # Final save of progress data
            await save_progress_data(progress_data, failed_data)

    end_time = time.time()
    duration = end_time - start_time

    logger.info(f"Run complete! Processed {processed_count} files in {duration:.1f} seconds")
    logger.info(f"Success rate: {success_count}/{processed_count} ({(success_count/processed_count*100):.1f}% if processed_count else 0)%")
    logger.info(f"Total cost: ${progress_data['total_cost']:.6f}")
    logger.info(f"Remaining files: {len(unprocessed_files) - processed_count}")

In [9]:
await process_files(num_files=34)

2025-03-11 10:05:21,515 - refactoring - INFO - Files to process: 34 of 35 total
2025-03-11 10:05:21,516 - refactoring - INFO - Processing file 1/34: C:\Users\harold.noble\Desktop\RIC\app\frontend\src\routes\+error.svelte
2025-03-11 10:05:21,516 - refactoring - INFO - Processing file 2/34: C:\Users\harold.noble\Desktop\RIC\app\frontend\src\routes\+layout.svelte
2025-03-11 10:05:21,517 - refactoring - INFO - Processing file 3/34: C:\Users\harold.noble\Desktop\RIC\app\frontend\src\routes\(app)\+layout.svelte
2025-03-11 10:05:21,517 - refactoring - INFO - Processing file 4/34: C:\Users\harold.noble\Desktop\RIC\app\frontend\src\routes\(app)\+page.svelte
2025-03-11 10:05:21,518 - refactoring - INFO - Processing file 5/34: C:\Users\harold.noble\Desktop\RIC\app\frontend\src\routes\auth\+page.svelte
2025-03-11 10:05:21,520 - refactoring - INFO - Refactoring chunk of app\frontend\src\routes\+error.svelte (attempt 1)
2025-03-11 10:05:22,530 - refactoring - INFO - Refactoring chunk of app\frontend