In [1]:

import os
import shutil
import asyncio
import aiohttp
import aiofiles
import time
from pathlib import Path
from typing import Set, List, Optional, Dict
import re

PROCESSED_FILES_RECORD = "processed_files.txt"

sources = {
    "OpenRouter": {
        "base_url": "https://openrouter.ai/api/v1",
        "model": "qwen/qwen-2.5-coder-32b-instruct:free",
        "api_key": "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"
    },
    "Groq": {
        "base_url": "https://api.groq.com/openai/v1/",
        "model": "qwen-2.5-coder-32b",
        "api_key": "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"
    },
    "Groq2": {
        "base_url": "https://api.groq.com/openai/v1/",
        "model": "qwen-2.5-coder-32b",
        "api_key": "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"
    },
    "X.ai": {
        "base_url": "https://api.x.ai/v1",
        "model": "grok-2-latest",
        "api_key": "xai-Lggu94vl22xIj5ThXFVu9nxbXjqYOqbTtLlxnibCypTkA5F8N4SvT0SDAYKIK8EkQCuVHv3tbfOPGiwt"
    },
    "Ollama": {
        "base_url": "http://localhost:11434",
        "model": "qwen2.5-coder",
        "api_key": None
    }
}

# Set the selected source
source = "X.ai"

# Base configuration
BASE_CONFIG = {
    "source_directories": [r"C:\Users\harold.noble\Desktop\RIC\app\backend"],
    "skip_directories": [],
    "selected_pass": "python combined",
    "requests_per_minute": 5,
    "concurrent_requests": 3,
    "max_context_tokens": 5000,
    "skip_long_files": False,
    "temperature": 0.6,
    "top_p": 0.95,
    "file_extensions": [".py", ".ts", ".svelte"],
    "retry_sleep_seconds": 30,
    "search_terms": [],
    "prompt_templates": {
            "python combined": (
                    """
                    Enhance Python Code with Documentation and Formatting
                    Code Formatting and Preservation

                        Preserve all functional code: Do not remove any working code, including variables, imports, or logic, unless explicitly unused (e.g., variables defined but never referenced).

                        Reduce excessive blank lines: Remove multiple consecutive blank lines, leaving at most one blank line between code blocks for readability, per PEP 8.

                        Refactor for clarity: Improve control flow using Python best practices (e.g., list comprehensions, context managers) only where it enhances readability, without altering behavior.

                        Format: Adhere to PEP 8 style guidelines (e.g., 79-character line limit, proper indentation), consolidating single-line assignments where readability is not compromised.

                        Maintain original structure: Keep the original intent, logic, and blank line spacing unless excessive (more than one consecutive blank line).

                    Documentation Standards

                        Docstrings:
                            Add triple-quoted docstrings ('''Docstring''') to:
                                Modules: At the file's start, describe its purpose and usage.
                                Classes: Explain the class's purpose, key attributes, and usage.
                                Functions: Detail the purpose, parameters, return values, and exceptions raised.
                            Follow Google Python Style Guide format, keeping descriptions concise yet informative.
                            Example:
                            '''
                            Short description of the function/class/module.
                            Args:
                                param1 (type): Description of param1.
                                param2 (type): Description of param2.
                            Returns:
                                type: Description of the return value.
                            Raises:
                                ExceptionType: Conditions under which this is raised.
                            '''

                        High-Level Comments:
                            Add single-line comments (# Purpose: Brief explanation) before major blocks (e.g., classes, functions, or complex logic) to describe their intent.
                            Use clear, concise language focused on "why" rather than "what."

                        Inline Comments:
                            Add inline comments (# Explanation) for non-obvious or complex logic, explaining the reasoning or necessity.
                            Avoid redundant comments on self-explanatory code (e.g., no # Increment x for x += 1).
                            Ensure comments enhance understanding without cluttering.

                    Strict Guidelines

                        Preserve original functionality: Do not alter the code's behavior in any way.
                        Retain variable and function names: No renaming allowed.
                        Do not delete blank lines unless they are excessive (more than one consecutive blank line).
                        Return only the enhanced code: Include only the formatted code with added comments and docstrings, no extra text or summaries.
                        Ensure every module, class, and function has a docstring.
                        Follow PEP 8 (style) and PEP 257 (docstring conventions).

                    Input: {content}
                    """
                ),
        "combined": (
            """
            Optimize and refine the following {file_type} code while preserving its core functionality:

            Code Optimization:
            - Remove unused variables, imports, dead code, and redundant logic.
            - Simplify conditionals by eliminating unnecessary if statements with a single outcome.
            - Refactor repeated logic and enhance control flow using {file_type}-specific best practices.
            - Exclude code related to version updates.
            - Consolidate assignments and statements onto single lines where appropriate, minimizing unnecessary breaks.

            Commenting & Documentation:
            - Add concise, high-level comments above interface, functions, type, class or major code blocks.
            - Include inline comments and property descriptions focusing on 'why' rather than 'what', unless the logic is complex.
            - Use these comment formats:
            - .py files: Triple-quoted docstrings (''') for functions and classes.
            - .ts/.svelte files: JSDoc-style comments (/** */) above functions and interfaces. Keep a single line.
            - HTML/Svelte markup: <!-- Purpose --> comments before major sections.

            Do not alter functional behavior—focus solely on optimization and commenting.

            Input code: {content}

            Return only the optimized code with updated comments, without explanations or additional text.
            """
        ),
        "adjust_comments": (
            "Adjust comments in the {file_type} code while preserving all functionality:\n"
            "- Remove all existing comments\n"
            "- Add concise, meaningful comments as follows:\n"
            "  - For .py files:\n"
            "    - Add a triple-quoted docstring ('''') to every function, describing its purpose in 1-2 sentences\n"
            "      - Example: '''\n      Initializes Pyodide and sets up global variables.\n      '''\n"
            "      - Use multi-line docstrings with proper indentation and line breaks only if >80 characters\n"
            "      - Keep each line under 80 characters\n"
            "    - Add single-line comments (e.g., # Purpose) before significant code blocks\n"
            "  - For .ts/.svelte files:\n"
            "    - Add JSDoc-style comments above functions:\n"
            "      - Use single-line /** Purpose */ only if the description is simple, fits within 80 characters, and requires no tags\n"
            "      - Use multi-line /** */ format if the comment includes tags (@param, @returns, @throws) or exceeds 80 characters\n"
            "      - Example (single-line): /** Represents an Ollama model. */\n"
            "      - Example (multi-line): /**\n       * Fetches audio configuration.\n"
            "       * @param string token - Bearer token for authentication.\n"
            "       * @returns object Audio configuration object.\n"
            "       * @throws Error Error if the request fails.\n"
            "       */\n"
            "    - Use single-line // Purpose for non-function code blocks or simple explanations\n"
            "  - For HTML/Svelte markup:\n"
            "    - Add single-line <!-- Purpose --> comments before major sections\n"
            "- Keep single-line comments (e.g., #, //, <!-- -->) and single-line JSDoc (/** */) under 80 characters\n"
            "- Use multi-line format for docstrings or JSDoc when including tags or if the description exceeds 80 characters\n"
            "- Focus comments on explaining 'why' rather than 'what', unless the logic is complex\n"
            "- Use single-line comments (e.g., //, #) instead of docstrings or JSDoc for trivial or self-explanatory functions\n"
            "- Do not modify any functional code, only adjust comments\n"
            "Input code:\n{content}\n\n"
            "Return only the modified code without explanations or additional text."
        ),
    }
}

# Set up CONFIGS based on source
if source == "Groq":
    CONFIGS = [
        {**BASE_CONFIG, "base_url": sources["Groq"]["base_url"], "api_key": sources["Groq"]["api_key"], "model": sources["Groq"]["model"]},
        {**BASE_CONFIG, "base_url": sources["Groq2"]["base_url"], "api_key": sources["Groq2"]["api_key"], "model": sources["Groq2"]["model"]}
    ]
else:
    CONFIGS = [{**BASE_CONFIG, "base_url": sources[source]["base_url"], "api_key": sources[source]["api_key"], "model": sources[source]["model"]}]


def load_processed_files() -> Set[str]:
    """Load previously processed file paths."""
    return set(line.strip() for line in open(PROCESSED_FILES_RECORD, "r", encoding="utf-8").readlines()) if os.path.exists(PROCESSED_FILES_RECORD) else set()

def save_processed_files(file_paths: List[str]) -> None:
    """Append newly processed file paths to record."""
    with open(PROCESSED_FILES_RECORD, "a", encoding="utf-8") as f:
        f.writelines(f"{fp}\n" for fp in file_paths)


class RateLimiter:
    def __init__(self, requests_per_minute: int):
        self.interval = 60.0 / requests_per_minute
        self.last_request_time = time.time()
        self.lock = asyncio.Lock()

    async def acquire(self):
        async with self.lock:
            current_time = time.time()
            sleep_time = self.interval - (current_time - self.last_request_time)
            if sleep_time > 0:
                await asyncio.sleep(sleep_time)
            self.last_request_time = time.time()


async def refactor_code(session: aiohttp.ClientSession, file_path: str, prompt: str, rate_limiter: RateLimiter, sem: asyncio.Semaphore, config: Dict) -> str:
    """Refactor code using API with improved error handling."""
    headers = {"Authorization": f"Bearer {config['api_key']}", "Content-Type": "application/json"}
    payload = {
        "model": config["model"],
        "messages": [{"role": "user", "content": prompt}],
        "temperature": config["temperature"],
        "top_p": config["top_p"],
    }

    async with sem:  # Only use semaphore as context manager
        await rate_limiter.acquire()  # Call acquire directly
        for attempt in range(3):
            try:
                async with session.post(
                    f"{config['base_url']}/chat/completions",
                    json=payload,
                    headers=headers,
                    timeout=aiohttp.ClientTimeout(total=180)
                ) as response:
                    if response.status != 200:
                        if response.status == 429:
                            await asyncio.sleep(config["retry_sleep_seconds"])
                            continue
                        print(f"API error {response.status} for {file_path}: {await response.text()}")
                        return ""

                    result = await response.json()
                    code = result.get("choices", [{}])[0].get("message", {}).get("content", "").strip()
                    if not code:
                        print(f"Empty response for {file_path}")
                        return ""

                    file_ext = Path(file_path).suffix[1:]
                    markers = [f"```{file_ext}", "```"]
                    if code.startswith(markers[0]) and code.endswith(markers[1]):
                        code = code[len(markers[0]):-len(markers[1])].strip()
                    return code

            except Exception as e:
                print(f"Attempt {attempt + 1} failed for {file_path}: {e}")
                if attempt < 2:
                    await asyncio.sleep(config["retry_sleep_seconds"])
    return ""


def split_into_chunks(content: str, max_tokens: int) -> List[str]:
    """Split code into chunks based on token estimate (approx 4 chars per token)."""
    lines = content.splitlines()
    chunks = []
    current_chunk = []
    current_length = 0
    token_limit = max_tokens * 4  # Rough estimate: 4 characters per token

    for line in lines:
        line_length = len(line)
        if current_length + line_length > token_limit and current_chunk:
            chunks.append("\n".join(current_chunk))
            current_chunk = [line]
            current_length = line_length
        else:
            current_chunk.append(line)
            current_length += line_length + 1  # +1 for newline
    if current_chunk:
        chunks.append("\n".join(current_chunk))
    return chunks


async def process_single_file(session: aiohttp.ClientSession, file_path: str, rate_limiter: RateLimiter, sem: asyncio.Semaphore, config: Dict) -> bool:
    """Process a single file, splitting into chunks if necessary."""
    file_ext = Path(file_path).suffix[1:]
    try:
        async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
            content = await f.read()
    except Exception as e:
        print(f"Failed to read {file_path}: {e}")
        return False

    # Split into chunks if content is too long
    chunks = split_into_chunks(content, config["max_context_tokens"])
    modified_chunks = []

    for i, chunk in enumerate(chunks):
        if config["skip_long_files"] and (len(chunk) // 4) > config["max_context_tokens"]:
            print(f"Skipping chunk {i+1} of {file_path} (too long)")
            modified_chunks.append(chunk)  # Keep original chunk if too long
            continue

        prompt = config["prompt_templates"].get(config["selected_pass"], "").format(file_type=file_ext, content=chunk)
        if not prompt:
            print(f"Invalid pass for chunk {i+1} of {file_path}")
            modified_chunks.append(chunk)
            continue

        modified_content = await refactor_code(session, file_path, prompt, rate_limiter, sem, config)
        if not modified_content:
            modified_chunks.append(chunk)  # Keep original if refactoring fails
        else:
            modified_chunks.append(modified_content)

    modified_content = "\n".join(modified_chunks)
    if modified_content == content:
        return False

    try:
        async with aiofiles.open(file_path, "w", encoding="utf-8") as f:
            await f.write(modified_content)
        print(f"Processed: {file_path}")
        return True
    except Exception as e:
        print(f"Failed to write {file_path}: {e}")
        return False

def should_skip_file(file_path: Path, skip_dirs: List[Path]) -> bool:
    """Check if file should be skipped based on directories."""
    file_path_str = str(file_path)
    return any(file_path_str.startswith(str(skip_dir)) for skip_dir in skip_dirs)

def scan_directories(config: Dict) -> List[str]:
    """Scan directories for files containing search terms."""
    processed_files = load_processed_files()
    file_paths = []
    skip_dirs = [Path(d).resolve() for d in config["skip_directories"]]

    for directory in config["source_directories"]:
        dir_path = Path(directory).resolve()
        if not dir_path.is_dir():
            continue
        for ext in config["file_extensions"]:
            for file_path in dir_path.glob(f"**/*{ext}"):
                file_path_str = str(file_path.resolve())
                if file_path_str in processed_files or should_skip_file(file_path, skip_dirs):
                    continue
                file_paths.append(file_path_str)

    print(f"Found {len(file_paths)} potential files")
    return file_paths

def backup_files(file_paths: List[str]) -> str:
    """Create backup of files."""
    if not file_paths:
        return ""
    backup_dir = f"backup_{int(time.time())}"
    os.makedirs(backup_dir, exist_ok=True)
    for fp in file_paths:
        shutil.copy(fp, os.path.join(backup_dir, os.path.basename(fp)))
    return backup_dir

async def process_file_batch(session: aiohttp.ClientSession, file_paths: List[str], config: Dict, rate_limiter: RateLimiter, sem: asyncio.Semaphore) -> List[bool]:
    """Process a batch of files."""
    return await asyncio.gather(*[process_single_file(session, fp, rate_limiter, sem, config) for fp in file_paths])


async def main(search_terms: List[str] = None, max_files_to_process: Optional[int] = None):
    """Main function with search term filtering and empty folder cleanup."""
    # Update config with search terms
    for config in CONFIGS:
        config["search_terms"] = search_terms or []

    if not os.path.exists(PROCESSED_FILES_RECORD):
        open(PROCESSED_FILES_RECORD, "w", encoding="utf-8").close()

    # Scan all files first
    all_file_paths = scan_directories(CONFIGS[0])

    # Filter files containing search terms
    matching_files = []
    for file_path in all_file_paths:
        try:
            async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
                content = await f.read()
            pattern = '|'.join(rf"(?:{re.escape(term)})" for term in config["search_terms"])
            if re.search(pattern, content, re.IGNORECASE):
                matching_files.append(file_path)
        except Exception as e:
            print(f"Error reading {file_path}: {e}")

    print(f"Found {len(matching_files)} files containing search terms: {search_terms}")

    # Apply max_files_to_process limit after filtering
    file_paths = matching_files
    if max_files_to_process is not None:
        file_paths = file_paths[:max_files_to_process]

    if not file_paths:
        print("No matching files found.")
        return

    print(f"Processing {len(file_paths)} files")
    backup_dir = backup_files(file_paths)

    file_batches = [file_paths] if source != "Groq" or len(CONFIGS) == 1 else [file_paths[:len(file_paths)//2], file_paths[len(file_paths)//2:]]

    rate_limiters = [RateLimiter(config["requests_per_minute"]) for config in CONFIGS]
    semaphores = [asyncio.Semaphore(config["concurrent_requests"]) for config in CONFIGS]

    async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(limit=sum(c["concurrent_requests"] for c in CONFIGS))) as session:
        all_results = []
        for config, files, rate_limiter, sem in zip(CONFIGS, file_batches, rate_limiters, semaphores):
            if files:
                results = await process_file_batch(session, files, config, rate_limiter, sem)
                all_results.extend(results)

        processed_files = [fp for fp, success in zip(file_paths, all_results) if success]
        save_processed_files(processed_files)
        print(f"Processed {len(processed_files)} files")

    if backup_dir and os.path.exists(backup_dir):
        shutil.rmtree(backup_dir)

    # Clean up empty directories in source directories
    for directory in CONFIGS[0]["source_directories"]:
        dir_path = Path(directory).resolve()
        if not dir_path.is_dir():
            continue
        for root, dirs, files in os.walk(dir_path, topdown=False):
            for d in dirs:
                dir_to_check = os.path.join(root, d)
                try:
                    if not os.listdir(dir_to_check):  # Check if directory is empty
                        os.rmdir(dir_to_check)
                        print(f"Removed empty directory: {dir_to_check}")
                except Exception as e:
                    print(f"Failed to remove {dir_to_check}: {e}")

In [2]:
await main(max_files_to_process=112)

Found 1 potential files
Found 1 files containing search terms: None
Processing 1 files
Processed: C:\Users\harold.noble\Desktop\RIC\app\backend\webui_backend\routers\ollama.py
Processed 1 files
