In [8]:
#!/usr/bin/env python3
"""
Script to refactor Python, Svelte, and TypeScript files using LLM streaming.
Processes files, tracks progress, and overwrites originals with refactored code.
Skips specified subfolders and logs problematic files to 'problematic_files.txt'.
"""

import os
import re
import time
from typing import List, Set
import ast
import chardet
from groq import Groq
from tqdm import tqdm

GROQ_API_KEY = "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"
DEFAULT_DIRECTORY = r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src"
SKIP_FOLDERS = frozenset({".github", "code_helper", "ollama-0"})
PROCESSED_FILES_TRACKER = "processed_files.txt"
PROBLEMATIC_FILES_LOG = "problematic_files.txt"
FILES_PER_RUN = 50
LLM_MODEL = "llama-3.3-70b-versatile"
LLM_TEMPERATURE = 0.5
LLM_TOP_P = 0.9
TIMEOUT_SECONDS = 180
MAX_RETRIES = 3
MAX_TOKENS = 6000

SYSTEM_PROMPT = """
You are an expert developer specializing in code refactoring, created to assist with optimizing code scripts. Your role is to refactor entire scripts while preserving their original functionality, function names, and file type (Python, Svelte, or TypeScript).

IMPORTANT: Your response must contain ONLY the refactored code with no explanations, comments about changes, or formatting markers. Do not include markdown code blocks, explanations, or anything else that is not part of the actual code. Maintain the original file type (Python, Svelte, or TypeScript).
"""

REFACTOR_PROMPT = """
Refactor the {file_type} code I provide according to the following guidelines:
1. Remove Components: Delete all code and derivatives related to: Gemini, ComfyUI, Tika, document intelligence, checkForVersionUpdates, playground, WebhookUrl, LDAP, OAuth, enable_community_sharing, ShareChat, ENABLE_CHANNELS, channels, channel_id, playground, Deepgram, Webhook, Proxy, Youtube Proxy, nonLocalVoices, haptic, mobile, and all i18n-related code and references.
2. Optimize Performance: Simplify logic flows and remove redundancies and inefficiencies. Streamline operations without sacrificing readability.
3. Clean Codebase: Eliminate unused imports and dead code.
5. Strengthen Error Handling: Use specific exceptions (e.g., ValueError, IOError) where applicable. Provide clear and actionable error messages.
6. Enhance Documentation: Add docstrings for modules, functions, and classes in Python, or JSDoc-style comments for TypeScript/Svelte. Keep inline comments for notes that cannot be included in documentation.
Constraints: Preserve existing function names and file type (Python, Svelte, or TypeScript). Return only the refactored code, without explanations or summaries. Include documentation appropriate to the file type. Refactor solely based on the code provided. Do not change exported functions or variable names. The current date is March 07, 2025. Process only the single script I provide.

Code:
{code_content}
"""


def get_file_type(file_path: str) -> str:
    """Determine the file type based on extension."""
    if file_path.endswith(".py"):
        return "Python"
    elif file_path.endswith(".svelte"):
        return "Svelte"
    elif file_path.endswith((".ts", ".tsx")):
        return "TypeScript"
    elif file_path.endswith(".js"):
        return "JavaScript"
    return "Unknown"


def fetch_llm_response(
    prompt: str,
    system_prompt: str = SYSTEM_PROMPT,
    model: str = LLM_MODEL,
    temperature: float = LLM_TEMPERATURE,
    top_p: float = LLM_TOP_P,
    retries: int = MAX_RETRIES,
) -> str:
    """
    Fetch refactored code from the Groq API.

    Args:
        prompt: Prompt with code to refactor.
        system_prompt: Instructions for the LLM.
        model: LLM model to use.
        temperature: Sampling temperature.
        top_p: Top-p sampling parameter.
        retries: Number of retry attempts.

    Returns:
        Refactored code string.

    Raises:
        ValueError: If prompt or API key is invalid.
        RuntimeError: If API fails after retries.
    """
    if not prompt or not isinstance(prompt, str):
        raise ValueError("Prompt must be a non-empty string")
    if not GROQ_API_KEY:
        raise ValueError("GROQ_API_KEY must be set")

    client = Groq(api_key=GROQ_API_KEY)
    for attempt in range(retries):
        try:
            response = client.chat.completions.create(
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": prompt},
                ],
                model=model,
                temperature=temperature,
                top_p=top_p,
                stream=False,
                timeout=TIMEOUT_SECONDS,
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
            if attempt == retries - 1:
                raise RuntimeError(f"API failed after {retries} retries: {str(e)}")
            time.sleep(2 ** attempt)


def load_processed_files(tracker_file: str) -> Set[str]:
    """
    Load processed file paths from tracker file.

    Args:
        tracker_file: Path to tracker file.

    Returns:
        Set of processed file paths.
    """
    processed = set()
    try:
        if not os.path.exists(tracker_file):
            with open(tracker_file, "w", encoding="utf-8") as f:
                f.write("# Processed Files\n# Tracks refactored files\n\n")
        else:
            with open(tracker_file, "r", encoding="utf-8") as f:
                processed.update(line.strip() for line in f if line.strip() and not line.startswith("#"))
    except IOError as e:
        tqdm.write(f"Failed to load {tracker_file}: {str(e)}")
    return processed


def save_processed_files(tracker_file: str, processed: Set[str]) -> None:
    """
    Save processed file paths to tracker file.

    Args:
        tracker_file: Path to tracker file.
        processed: Set of processed file paths.
    """
    try:
        with open(tracker_file, "w", encoding="utf-8") as f:
            f.write("# Processed Files\n# Tracks refactored files\n\n")
            f.writelines(f"{path}\n" for path in sorted(processed))
    except IOError as e:
        tqdm.write(f"Failed to save {tracker_file}: {str(e)}")


def load_problematic_files(log_file: str) -> Set[str]:
    """
    Load problematic file paths from log file.

    Args:
        log_file: Path to problematic files log.

    Returns:
        Set of problematic file paths.
    """
    problematic = set()
    try:
        if not os.path.exists(log_file):
            with open(log_file, "w", encoding="utf-8") as f:
                f.write("# Problematic Files\n# Tracks files that failed refactoring\n\n")
        else:
            with open(log_file, "r", encoding="utf-8") as f:
                problematic.update(line.strip() for line in f if line.strip() and not line.startswith("#"))
    except IOError as e:
        tqdm.write(f"Failed to load {log_file}: {str(e)}")
    return problematic


def save_problematic_files(log_file: str, problematic: Set[str]) -> None:
    """
    Save problematic file paths to log file.

    Args:
        log_file: Path to problematic files log.
        problematic: Set of problematic file paths.
    """
    try:
        with open(log_file, "w", encoding="utf-8") as f:
            f.write("# Problematic Files\n# Tracks files that failed refactoring\n\n")
            f.writelines(f"{path}\n" for path in sorted(problematic))
    except IOError as e:
        tqdm.write(f"Failed to save {log_file}: {str(e)}")


def is_valid_code(file_path: str, code: str) -> bool:
    """
    Validate code syntax by file type.

    Args:
        file_path: Path to the file.
        code: Code content to validate.

    Returns:
        True if syntax is valid, False otherwise.
    """
    if not code.strip():
        return False
    if file_path.endswith(".py"):
        try:
            ast.parse(code)
            return True
        except SyntaxError:
            return False
    # For Svelte/TS/JS, just check if it's non-empty (basic validation)
    return True


def process_single_file(file_path: str, problematic_files: Set[str]) -> bool:
    """
    Refactor a single file and update it if successful.

    Args:
        file_path: Path to file to refactor.
        problematic_files: Set of files that failed processing.

    Returns:
        True if refactored and saved, False otherwise.
    """
    try:
        with open(file_path, "rb") as f:
            raw_content = f.read()
            detected = chardet.detect(raw_content)
            encoding = detected["encoding"] or "latin1"
        with open(file_path, "r", encoding=encoding) as f:
            original_content = f.read()

        file_type = get_file_type(file_path)
        if file_type == "Unknown":
            problematic_files.add(file_path)
            tqdm.write(f"Unknown file type: {file_path}")
            return False

        prompt = REFACTOR_PROMPT.format(file_type=file_type, code_content=original_content)

        # Check token limit before sending to LLM
        if len(prompt) > MAX_TOKENS * 4:
            problematic_files.add(file_path)
            tqdm.write(f"File exceeds token limit: {file_path}")
            return False

        refactored_content = fetch_llm_response(prompt)

        if not is_valid_code(file_path, refactored_content):
            problematic_files.add(file_path)
            tqdm.write(f"Invalid refactored code for {file_path}")
            return False

        if refactored_content.strip() == original_content.strip():
            return False

        with open(file_path, "w", encoding="utf-8") as f:
            f.write(refactored_content.rstrip() + "\n")
        return True

    except IOError as e:
        tqdm.write(f"IO error with {file_path}: {str(e)}")
        problematic_files.add(file_path)
        return False
    except RuntimeError as e:
        tqdm.write(f"LLM error with {file_path}: {str(e)}")
        problematic_files.add(file_path)
        return False


def get_files_to_process(directory: str) -> List[str]:
    """
    Retrieve list of files to process from directory.

    Args:
        directory: Root directory to scan.

    Returns:
        List of file paths.
    """
    extensions = (".py", ".svelte", ".ts", ".tsx", ".js")
    return [
        os.path.join(root, file)
        for root, _, files in os.walk(directory)
        if not any(skip in root for skip in SKIP_FOLDERS)
        for file in files
        if file.endswith(extensions)
    ]


def main() -> None:
    """
    Orchestrate file refactoring with progress tracking.
    """
    if not os.path.isdir(DEFAULT_DIRECTORY):
        print(f"Directory not found: {DEFAULT_DIRECTORY}")
        return

    processed_files = load_processed_files(PROCESSED_FILES_TRACKER)
    problematic_files = load_problematic_files(PROBLEMATIC_FILES_LOG)
    all_files = get_files_to_process(DEFAULT_DIRECTORY)

    if not all_files:
        print(f"No files found in {DEFAULT_DIRECTORY}")
        return

    files_to_process = [f for f in all_files if f not in processed_files]
    if not files_to_process:
        print("All files processed")
        return

    batch = files_to_process[:FILES_PER_RUN]
    with tqdm(total=len(batch), desc="Refactoring", unit="file") as pbar:
        for file_path in batch:
            if process_single_file(file_path, problematic_files):
                processed_files.add(file_path)
                save_processed_files(PROCESSED_FILES_TRACKER, processed_files)
            if file_path in problematic_files:
                save_problematic_files(PROBLEMATIC_FILES_LOG, problematic_files)
            pbar.update(1)


if __name__ == "__main__":
    main()

Refactoring:   0%|          | 0/50 [00:00<?, ?file/s]

File exceeds token limit: C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\apis\index.ts


Refactoring:  62%|██████▏   | 31/50 [07:18<03:40, 11.63s/file]

LLM error with C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\admin\Evaluations\Leaderboard.svelte: API failed after 3 retries: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01j1qez3f7fj98shsdn8gf3qsm` service tier `on_demand` on tokens per day (TPD): Limit 100000, Used 102416, Requested 3379. Please try again in 1h23m27.285999999s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


Refactoring:  64%|██████▍   | 32/50 [07:22<02:47,  9.32s/file]

LLM error with C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\admin\Functions\FunctionEditor.svelte: API failed after 3 retries: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01j1qez3f7fj98shsdn8gf3qsm` service tier `on_demand` on tokens per day (TPD): Limit 100000, Used 102411, Requested 3903. Please try again in 1h30m56.079s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}


Refactoring:  66%|██████▌   | 33/50 [07:31<03:52, 13.70s/file]


KeyboardInterrupt: 