In [3]:
import asyncio
import aiohttp
import aiofiles
from pathlib import Path
from typing import List, Dict, Optional
import json
import re

# Configuration
CONFIG = {
    "source_directories": [r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\apis"],
    "base_url": "https://api.groq.com/openai/v1/",
    "model": "qwen-2.5-coder-32b",
    "api_key": "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",
    "file_extensions": [".py", ".ts", ".svelte"],
    "max_context_tokens": 1000,
    "requests_per_minute": 5,
    "concurrent_requests": 3,
    "temperature": 0.6,
    "top_p": 0.95,
    "retry_sleep_seconds": 30,
    "prompt_template": (
        "Analyze this {file_type} code and suggest better function names:\n"
        "1. Identify each function and its purpose.\n"
        "2. Propose new, descriptive names that reflect their functionality.\n"
        "3. Return a JSON list of objects with 'original_name' and 'new_name' fields.\n"
        "4. Do not modify the code itself, only return the name mappings.\n\n"
        "Input code:\n{content}\n\n"
        "Return only the JSON list without additional explanation."
    )
}


class RateLimiter:
    """Rate limiter to control the number of requests per minute."""

    def __init__(self, requests_per_minute: int):
        self.interval = 60.0 / requests_per_minute
        self.last_request_time = 0
        self.lock = asyncio.Lock()

    async def acquire(self):
        async with self.lock:
            current_time = asyncio.get_event_loop().time()
            elapsed = current_time - self.last_request_time
            if elapsed < self.interval:
                await asyncio.sleep(self.interval - elapsed)
            self.last_request_time = asyncio.get_event_loop().time()


def split_into_chunks(content: str, max_tokens: int, file_type: str) -> List[str]:
    """Split content into chunks based on token estimate with improved boundary detection."""
    estimated_tokens = len(content) // 4
    if estimated_tokens <= max_tokens:
        return [content]

    lines = content.splitlines()
    chunks = []
    current_chunk = []
    current_token_count = 0
    boundary_markers = {
        "py": [r"def\s+\w+\s*\(", r"class\s+\w+"],
        "ts": [r"function\s+\w+\s*\(", r"class\s+\w+"],
        "svelte": [r"function\s+\w+\s*\(", r"<script>"]
    }.get(file_type, [r"\n\s*\n"])

    for line in lines:
        line_tokens = len(line) // 4
        if current_token_count + line_tokens > max_tokens and current_chunk:
            chunk_text = "\n".join(current_chunk)
            last_boundary = -1
            for marker in boundary_markers:
                matches = list(re.finditer(marker, chunk_text))
                if matches:
                    last_boundary = max(last_boundary, matches[-1].start())
            if last_boundary > 0:
                split_point = last_boundary
                chunks.append(chunk_text[:split_point].strip())
                remaining = chunk_text[split_point:].strip()
                current_chunk = [remaining] if remaining else []
                current_token_count = len(remaining) // 4
            else:
                chunks.append(chunk_text.strip())
                current_chunk = []
                current_token_count = 0
        current_chunk.append(line)
        current_token_count += line_tokens

    if current_chunk:
        chunks.append("\n".join(current_chunk).strip())

    return [chunk for chunk in chunks if chunk]


async def get_function_name_suggestions(session: aiohttp.ClientSession, file_path: str, content: str, file_type: str, rate_limiter: RateLimiter, sem: asyncio.Semaphore, config: Dict) -> List[Dict[str, str]]:
    """Send code to LLM and get function name suggestions with improved JSON parsing."""
    prompt = config["prompt_template"].format(file_type=file_type, content=content)
    headers = {
        "Authorization": f"Bearer {config['api_key']}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": config["model"],
        "messages": [{"role": "user", "content": prompt}],
        "temperature": config["temperature"],
        "top_p": config["top_p"],
    }

    for attempt in range(3):
        try:
            await rate_limiter.acquire()
            async with sem:
                print(f"Sending prompt to LLM for {file_path}")
                async with session.post(
                    f"{config['base_url']}chat/completions",
                    json=payload,
                    headers=headers,
                    timeout=aiohttp.ClientTimeout(total=180)
                ) as response:
                    if response.status != 200:
                        error_text = await response.text()
                        print(f"API error {response.status} for {file_path}: {error_text}")
                        continue

                    result = await response.json()
                    if "choices" not in result or not result["choices"]:
                        print(f"Missing 'choices' in API response for {file_path}: {result}")
                        continue

                    json_response = result["choices"][0]["message"]["content"].strip()
                    # Clean up the response to handle potential formatting issues
                    json_response = re.sub(r'^```json\s*|\s*```$', '', json_response, flags=re.MULTILINE).strip()
                    try:
                        mappings = json.loads(json_response)
                        if not isinstance(mappings, list) or not all(isinstance(m, dict) and "original_name" in m and "new_name" in m for m in mappings):
                            print(f"Invalid response format for {file_path}: {json_response}")
                            return []
                        return mappings
                    except json.JSONDecodeError as e:
                        print(f"Failed to parse JSON response for {file_path}: {json_response}\nError: {e}")
                        return []
        except Exception as e:
            print(f"Attempt {attempt + 1} failed for {file_path}: {e}")
            if attempt < 2:
                await asyncio.sleep(config["retry_sleep_seconds"])
    print(f"All attempts failed for {file_path}")
    return []


async def process_single_file(session: aiohttp.ClientSession, file_path: str, rate_limiter: RateLimiter, sem: asyncio.Semaphore, config: Dict) -> List[Dict[str, str]]:
    """Process a single file and return function name mappings."""
    file_ext = Path(file_path).suffix[1:]
    all_mappings = []

    try:
        async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
            content = await f.read()
    except Exception as e:
        print(f"Failed to read {file_path}: {e}")
        return []

    estimated_tokens = len(content) // 4
    if estimated_tokens > config["max_context_tokens"]:
        print(f"File {file_path} ({estimated_tokens} tokens) exceeds max_context_tokens ({config['max_context_tokens']}). Splitting into chunks.")
        chunks = split_into_chunks(content, config["max_context_tokens"], file_ext)
    else:
        chunks = [content]

    for i, chunk in enumerate(chunks):
        mappings = await get_function_name_suggestions(session, f"{file_path} (chunk {i+1}/{len(chunks)})", chunk, file_ext, rate_limiter, sem, config)
        if mappings:
            all_mappings.extend(mappings)
        else:
            print(f"Failed to get suggestions for chunk {i+1} of {file_path}")

    return all_mappings


async def scan_directories(config: Dict) -> List[str]:
    """Scan source directories and return file paths."""
    file_paths = []
    for directory in config["source_directories"]:
        dir_path = Path(directory).resolve()
        if not dir_path.is_dir():
            print(f"Directory not found: {directory}")
            continue
        for ext in config["file_extensions"]:
            for file_path in dir_path.glob(f"**/*{ext}"):
                file_paths.append(str(file_path.resolve()))
    print(f"Found {len(file_paths)} files to process")
    return file_paths


async def main(max_files_to_process: Optional[int] = None):
    file_paths = await scan_directories(CONFIG)
    if max_files_to_process is not None:
        file_paths = file_paths[:max_files_to_process]

    if not file_paths:
        print("No files to process. Exiting.")
        return

    rate_limiter = RateLimiter(CONFIG["requests_per_minute"])
    semaphore = asyncio.Semaphore(CONFIG["concurrent_requests"])
    all_mappings = {}

    async with aiohttp.ClientSession() as session:
        tasks = [process_single_file(session, fp, rate_limiter, semaphore, CONFIG) for fp in file_paths]
        results = await asyncio.gather(*tasks)
        for file_path, mappings in zip(file_paths, results):
            if mappings:
                all_mappings[file_path] = mappings

    print(f"Processing complete. Analyzed {len(all_mappings)} files.")
    if all_mappings:
        print("\nFunction name suggestions:")
        for file_path, mappings in all_mappings.items():
            print(f"\n{file_path}:")
            for mapping in mappings:
                print(f"  {mapping['original_name']} -> {mapping['new_name']}")


In [4]:
await main(max_files_to_process=5)

Found 22 files to process
File C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\apis\auths\index.ts (1670 tokens) exceeds max_context_tokens (1000). Splitting into chunks.
Sending prompt to LLM for C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\apis\auths\index.ts (chunk 1/2)
File C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\apis\configs\index.ts (1495 tokens) exceeds max_context_tokens (1000). Splitting into chunks.
File C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\apis\index.ts (5060 tokens) exceeds max_context_tokens (1000). Splitting into chunks.
File C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\apis\chats\index.ts (4634 tokens) exceeds max_context_tokens (1000). Splitting into chunks.
Sending prompt to LLM for C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\apis\audio\index.ts (chunk 1/1)
Sending prompt to LLM for C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\apis\configs\index.ts (chunk 1/2)
Sending prompt to LLM for C:\

CancelledError: 