In [1]:
import os
import shutil
import asyncio
import aiohttp
import aiofiles
import time
from pathlib import Path
from typing import Set, List, Optional, Dict, Any, Tuple
import json

PROCESSED_FILES_RECORD = "processed_files.txt"

sources = {
    "OpenRouter": {
        "base_url": "https://openrouter.ai/api/v1",
        "model": "qwen/qwen-2.5-coder-32b-instruct:free",
        "api_key": "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"
    },
    "Groq": {
        "base_url": "https://api.groq.com/openai/v1/",
        "model": "qwen-2.5-coder-32b",
        "api_key": "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"
    },
    "Groq2": {
        "base_url": "https://api.groq.com/openai/v1/",
        "model": "qwen-2.5-coder-32b",
        "api_key": "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"
    },
    "X.ai": {
        "base_url": "https://api.x.ai/v1",
        "model": "grok-2-latest",
        "api_key": "xai-Lggu94vl22xIj5ThXFVu9nxbXjqYOqbTtLlxnibCypTkA5F8N4SvT0SDAYKIK8EkQCuVHv3tbfOPGiwt"
    },
    "Ollama": {
        "base_url": "http://localhost:11434",
        "model": "qwen2.5-coder",
        "api_key": None
    }
}

# Set the selected source
source = "Groq"
CONFIG = {
    "base_url": sources[source]["base_url"],
    "api_key": sources[source]["api_key"],
    "model": sources[source]["model"],
    "source_directories": [r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\workspace"],
    "skip_directories": [
        r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\i18n",
    ],
    "pipeline_mode": "combined",  # "multiple" "single" "combined"
    "selected_pass": "optimize",
    "requests_per_minute": 20,
    "concurrent_requests": 3,
    "max_context_tokens": 131072,
    "temperature": 0.6,
    "top_p": 0.95,
    "file_extensions": [".py", ".ts", ".svelte"],
    "retry_sleep_seconds": 30,
    "prompt_templates": {
        "optimize": (
            "Optimize the {file_type} code while preserving core functionality:\n"
            "- Remove all `i18n` references (e.g., `t()`, `$t`, `i18n.*`)\n"
            "- Think step by step about each change to ensure functionality is preserved\n"
            "- Simplify code and improve performance through careful analysis\n"
            "- Remove unused variables, imports, and dead code\n"
            "- Refactor repeated logic into reusable functions/components\n"
            "- Use descriptive, consistent naming (e.g., `calculateTotal` not `calc`)\n"
            "- Leverage {file_type}-specific features and best practices\n"
            "- Line length: 80-100 characters\n"
            "- Use early returns for clearer control flow\n"
            "- Follow CamelCase for variables/functions, PascalCase for classes/components\n"
            "- No empty lines between imports; one empty line before code\n"
            "- Preserve core logic and `onMount(() => {{`) in .svelte files\n"
            "- Be decisive in your optimizations, choosing the best approach rather than offering alternatives\n\n"
            "Input code:\n{content}\n\n"
            "Return only the modified code without any explanations or additional text."
        ),
        "comment_cleanup": (
            "Process the {file_type} code while preserving functionality:\n"
            "- Remove all `i18n` references (e.g., `t()`, `$t`, `i18n.*`)\n"
            "- Remove all existing comments (e.g., `//`, `#`, `/* */`)\n"
            "- Add clear, concise, and informative documentation:\n"
            "  - For .py files: use `'''Purpose, parameters, returns'''` docstrings in every function and script section\n"
            "  - For .ts/.svelte files: use `/** Purpose, @param, @returns */` JSDoc comments for functions and components\n"
            "  - For HTML and Svelte markup: add comments in main areas to describe the structure and purpose of sections\n"
            "- Ensure every script block and main HTML area includes appropriate comments\n"
            "- Documentation should be precise and helpful without being verbose\n"
            "- Line length: 80-100 characters\n"
            "- Use early returns for clearer code paths\n"
            "- Follow CamelCase for variables/functions, PascalCase for classes/components\n"
            "- No empty lines between imports; one empty line before code\n"
            "- Preserve core logic and `onMount(() => {{`) in .svelte files\n"
            "- Be thorough and attentive to detail\n\n"
            "Input code:\n{content}\n\n"
            "Return only the modified code without any explanations or additional text."
        ),
        "verification": (
            "Verify the modified {file_type} code with careful analysis:\n"
            "- Original: {original_content}\n"
            "- Modified: {modified_content}\n"
            "- Think step by step, comparing sections to ensure functionality preservation\n"
            "- Allow removal of: `i18n` references, unused code, empty import lines\n"
            "- Disallow changes to: core logic, `onMount(() => {{`) in .svelte files\n"
            "- Check that no functionality is missing or altered\n"
            "- Verify that all required imports remain intact\n"
            "- Ensure code standards are consistently applied\n"
            "- Be cautious about changes that might affect behavior\n"
            "- Return the modified code if valid; otherwise, return an empty string\n"
            "- Provide only the final verified code without any explanations or additional text"
        ),
        "combined": (
            "Combine the following two tasks for the {file_type} code while preserving core functionality:\n"
            "1. Optimize the code by removing all `i18n` references, unused variables, imports, dead code, and by refactoring repeated logic. "
            "Use descriptive naming, leverage {file_type}-specific best practices, and maintain clear control flow.\n"
            "2. Clean up comments by removing all existing comments and adding clear, concise, and informative documentation. "
            "For .py files use docstrings; for .ts/.svelte files use JSDoc comments; and for markup add structured comments.\n\n"
            "Remove code relating to: ldap, updating version, oauth, haptic, mobile."
            "Input code:\n{content}\n\n"
            "Return only the modified code without any explanations or additional text."
        ),
    },
}


def load_processed_files() -> Set[str]:
    """Load the list of already processed files."""
    if not os.path.exists(PROCESSED_FILES_RECORD):
        return set()
    with open(PROCESSED_FILES_RECORD, "r", encoding="utf-8") as f:
        return {line.strip() for line in f if line.strip()}


def save_processed_files(file_paths: List[str]) -> None:
    """Add processed files to the record."""
    with open(PROCESSED_FILES_RECORD, "a", encoding="utf-8") as f:
        for fp in file_paths:
            f.write(f"{fp}\n")


class RateLimiter:
    def __init__(self, requests_per_minute: int):
        self.interval = 60.0 / requests_per_minute
        self.last_request_time = 0
        self.lock = asyncio.Lock()

    async def acquire(self):
        async with self.lock:
            current_time = asyncio.get_event_loop().time()
            elapsed = current_time - self.last_request_time
            if elapsed < self.interval:
                await asyncio.sleep(self.interval - elapsed)
            self.last_request_time = asyncio.get_event_loop().time()


async def refactor_code(session: aiohttp.ClientSession, file_path: str, prompt: str, rate_limiter: RateLimiter, sem: asyncio.Semaphore) -> str:
    """Call the API with a given prompt and return the modified code. Handles both remote APIs and local Ollama."""
    if source == "Ollama":
        try:
            await rate_limiter.acquire()
            async with sem:
                loop = asyncio.get_event_loop()
                response = await loop.run_in_executor(None, ollama.chat,
                                                      model=CONFIG["model"],
                                                      messages=[{"role": "user", "content": prompt}],
                                                      options={"temperature": CONFIG["temperature"], "top_p": CONFIG["top_p"]}
                                                      )
                code = response["message"]["content"].strip()
                for marker in ["```" + Path(file_path).suffix[1:], "```"]:
                    if code.startswith(marker) and code.endswith("```"):
                        code = "\n".join(code.splitlines()[1:-1]).strip()
                        break
                return code or ""
        except Exception as e:
            print(f"Ollama processing failed for {file_path}: {e}")
            return ""
    else:
        headers = {
            "Authorization": f"Bearer {CONFIG['api_key']}",
            "Content-Type": "application/json"
        }
        payload = {
            "model": CONFIG["model"],
            "messages": [{"role": "user", "content": prompt}],
            "temperature": CONFIG["temperature"],
            "top_p": CONFIG["top_p"],
        }
        for attempt in range(3):
            try:
                await rate_limiter.acquire()
                async with sem:
                    async with session.post(
                        f"{CONFIG['base_url']}/chat/completions",
                        json=payload,
                        headers=headers,
                        timeout=aiohttp.ClientTimeout(total=180)
                    ) as response:
                        if response.status == 429:
                            error_text = await response.text()
                            error_json = json.loads(error_text)
                            if error_json.get("error", {}).get("type") == "tokens":
                                retry_after = error_json["error"].get("retry_after", CONFIG["retry_sleep_seconds"])
                                print(f"Rate limit reached for {file_path}. Retrying after {retry_after} seconds.")
                                await asyncio.sleep(retry_after)
                                continue
                            else:
                                print(f"Rate limit error {response.status} for {file_path}: {error_text}")
                                return ""

                        if response.status == 413:  # Handle "Request too large" error
                            error_text = await response.text()
                            print(f"Request too large for {file_path}: {error_text}")
                            return ""  # Skip retries immediately

                        if response.status != 200:
                            error_text = await response.text()
                            print(f"API error {response.status} for {file_path}: {error_text}")
                            continue

                        result = await response.json()
                        if "choices" not in result or not result["choices"]:
                            print(f"Missing 'choices' in API response for {file_path}: {result}")
                            continue

                        code = result["choices"][0]["message"]["content"].strip()
                        for marker in ["```" + Path(file_path).suffix[1:], "```"]:
                            if code.startswith(marker) and code.endswith("```"):
                                code = "\n".join(code.splitlines()[1:-1]).strip()
                                break
                        return code or ""
            except Exception as e:
                print(f"Attempt {attempt + 1} failed for {file_path}: {e}")
                if attempt < 2:
                    await asyncio.sleep(CONFIG["retry_sleep_seconds"])
        print(f"All attempts failed for {file_path}")
        return ""

async def process_single_file(session: aiohttp.ClientSession, file_path: str, rate_limiter: RateLimiter, sem: asyncio.Semaphore) -> bool:
    """
    Process a single file based on the configured pipeline mode and return whether it was successfully processed and modified.
    - "multiple": Run optimize, verification, comment cleanup, then final verification.
    - "combined": Run one API call using a combined prompt.
    - "single": Run only the selected pass (e.g., "optimize", "comment_cleanup", or "verification").
    """
    file_ext = Path(file_path).suffix[1:]
    try:
        async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
            original_content = await f.read()
    except Exception as e:
        print(f"Failed to read {file_path}: {e}")
        return False

    modified_content = original_content
    successfully_processed = False

    mode = CONFIG["pipeline_mode"]
    if mode == "multiple":
        optimize_prompt = CONFIG["prompt_templates"]["optimize"].format(file_type=file_ext, content=modified_content)
        optimized = await refactor_code(session, file_path, optimize_prompt, rate_limiter, sem)
        if optimized:
            modified_content = optimized

        verify_prompt = CONFIG["prompt_templates"]["verification"].format(file_type=file_ext, original_content=original_content, modified_content=modified_content)
        verified_opt = await refactor_code(session, file_path, verify_prompt, rate_limiter, sem)
        if verified_opt:
            modified_content = verified_opt

        cleanup_prompt = CONFIG["prompt_templates"]["comment_cleanup"].format(file_type=file_ext, content=modified_content)
        cleaned = await refactor_code(session, file_path, cleanup_prompt, rate_limiter, sem)
        if cleaned:
            modified_content = cleaned

        final_verify_prompt = CONFIG["prompt_templates"]["verification"].format(file_type=file_ext, original_content=original_content, modified_content=modified_content)
        final_code = await refactor_code(session, file_path, final_verify_prompt, rate_limiter, sem)
        if final_code:
            modified_content = final_code

    elif mode == "combined":
        combined_prompt = CONFIG["prompt_templates"]["combined"].format(file_type=file_ext, content=modified_content)
        combined_result = await refactor_code(session, file_path, combined_prompt, rate_limiter, sem)
        if combined_result:
            modified_content = combined_result

    elif mode == "single":
        pass_name = CONFIG["selected_pass"]
        if pass_name not in CONFIG["prompt_templates"]:
            print(f"Selected pass '{pass_name}' not found in prompt templates for {file_path}")
            return False
        single_prompt = CONFIG["prompt_templates"][pass_name].format(
            file_type=file_ext, content=modified_content,
            original_content=original_content, modified_content=modified_content
        )
        single_result = await refactor_code(session, file_path, single_prompt, rate_limiter, sem)
        if single_result:
            modified_content = single_result

    # Only consider it successfully processed if the content has changed
    if modified_content != original_content:
        try:
            async with aiofiles.open(file_path, "w", encoding="utf-8") as f:
                await f.write(modified_content)
            print(f"Processed: {file_path}")
            successfully_processed = True
        except Exception as e:
            print(f"Failed to write {file_path}: {e}")
            successfully_processed = False
    else:
        print(f"No changes for: {file_path}")
        successfully_processed = False

    return successfully_processed


async def main(max_files_to_process: Optional[int] = None):
    if not os.path.exists(PROCESSED_FILES_RECORD):
        open(PROCESSED_FILES_RECORD, "w", encoding="utf-8").close()

    file_paths = scan_directories()
    if max_files_to_process is not None:
        file_paths = file_paths[:max_files_to_process]

    if not file_paths:
        print("No new files to process. Exiting.")
        return

    backup_dir = backup_files(file_paths)
    rate_limiter = RateLimiter(CONFIG["requests_per_minute"])
    sem = asyncio.Semaphore(CONFIG["concurrent_requests"])

    connector = aiohttp.TCPConnector(limit=CONFIG["concurrent_requests"])
    async with aiohttp.ClientSession(connector=connector) as session:
        tasks = [process_single_file(session, fp, rate_limiter, sem) for fp in file_paths]
        results = await asyncio.gather(*tasks)
        processed_files = [fp for fp, success in zip(file_paths, results) if success]

        save_processed_files(processed_files)
        print(f"Added {len(processed_files)} files to processed files record.")

    if backup_dir and os.path.exists(backup_dir):
        shutil.rmtree(backup_dir)
        print(f"Removed backup directory {backup_dir}")

    print(f"Processing complete. {len(processed_files)} files processed.")


def should_skip_file(file_path: Path, skip_dirs: List[Path]) -> bool:
    """Check if a file should be skipped based on skip directories."""
    file_path_str = str(file_path.resolve())
    for skip_dir in skip_dirs:
        skip_dir_str = str(skip_dir)
        if file_path_str.startswith(skip_dir_str) and (
            len(file_path_str) == len(skip_dir_str) or file_path_str[len(skip_dir_str)] == os.path.sep
        ):
            return True
    return False


def scan_directories() -> List[str]:
    """Scan source directories and return new file paths to process."""
    processed_files = load_processed_files()
    file_paths = []
    skip_dirs = [Path(d).resolve() for d in CONFIG["skip_directories"]]

    for directory in CONFIG["source_directories"]:
        dir_path = Path(directory).resolve()
        if not dir_path.is_dir():
            print(f"Directory not found: {directory}")
            continue
        for ext in CONFIG["file_extensions"]:
            for file_path in dir_path.glob(f"**/*{ext}"):
                file_path = file_path.resolve()
                if should_skip_file(file_path, skip_dirs) or str(file_path) in processed_files:
                    continue
                file_paths.append(str(file_path))
    print(f"Found {len(file_paths)} new files to process")
    return file_paths


def backup_files(file_paths: List[str]) -> str:
    """Backup files before processing."""
    if not file_paths:
        print("No files to backup")
        return ""
    backup_dir = f"backup_{int(time.time())}"
    os.makedirs(backup_dir, exist_ok=True)
    for fp in file_paths:
        shutil.copy(fp, os.path.join(backup_dir, os.path.basename(fp)))
    print(f"Backed up {len(file_paths)} files to {backup_dir}")
    return backup_dir


async def main(max_files_to_process: Optional[int] = None):
    if not os.path.exists(PROCESSED_FILES_RECORD):
        open(PROCESSED_FILES_RECORD, "w", encoding="utf-8").close()

    file_paths = scan_directories()
    if max_files_to_process is not None:
        file_paths = file_paths[:max_files_to_process]

    if not file_paths:
        print("No new files to process. Exiting.")
        return

    backup_dir = backup_files(file_paths)
    rate_limiter = RateLimiter(CONFIG["requests_per_minute"])
    sem = asyncio.Semaphore(CONFIG["concurrent_requests"])

    connector = aiohttp.TCPConnector(limit=CONFIG["concurrent_requests"])
    async with aiohttp.ClientSession(connector=connector) as session:
        tasks = [process_single_file(session, fp, rate_limiter, sem) for fp in file_paths]
        results = await asyncio.gather(*tasks)
        processed_files = [fp for fp, success in zip(file_paths, results) if success]

        save_processed_files(processed_files)
        print(f"Added {len(processed_files)} files to processed files record.")

    if backup_dir and os.path.exists(backup_dir):
        shutil.rmtree(backup_dir)
        print(f"Removed backup directory {backup_dir}")

    print(f"Processing complete. {len(processed_files)} files processed.")

In [2]:
await main(max_files_to_process=10)

Found 26 new files to process
Backed up 10 files to backup_1741893441
Processed: C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\workspace\Knowledge.svelte
Processed: C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\workspace\Prompts.svelte
Processed: C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\workspace\Knowledge\ItemMenu.svelte
Processed: C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\workspace\Tools.svelte
Processed: C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\workspace\common\AccessControlModal.svelte
Rate limit reached for C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\workspace\Models.svelte. Retrying after 30 seconds.
Rate limit reached for C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\workspace\common\AccessControl.svelte. Retrying after 30 seconds.
Processed: C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\components\workspace\common\M

In [None]:
for _ in range(10):  # Outer loop runs 10 times
    for _ in range(3):  # Inner loop runs 3 times
        await main(max_files_to_process=5)
        await asyncio.sleep(60 * 2)  # Sleep for 2 minutes between inner loop iterations
    await asyncio.sleep(60 * 5)  # Sleep for 5 minutes between outer loop iterations