In [3]:
#!/usr/bin/env python3
"""
Script to refactor Python, Svelte, and TypeScript files by processing entire files with LLM streaming.
Tracks processed files and saves refactored code back to the original files.
Allows skipping specified subfolders. Focuses on docstrings, formatting, optimization, error handling, and cleanup.
"""

import os
import re
import time
from typing import List, Set

import ast
import chardet
from groq import Groq
from tqdm import tqdm

# Constants
GROQ_API_KEY = "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"
DEFAULT_DIRECTORY = r"C:/Users/HJN/Desktop/open-webui - Copy/app/src"  # Fixed escape sequence
SKIP_FOLDERS = {".github", "code_helper", "ollama-0"}
PROCESSED_FILES_TRACKER = "processed_files.txt"
FILES_PER_RUN = 10
LLM_MODEL = "llama-3.3-70b-versatile"
LLM_TEMPERATURE = 0.5
LLM_TOP_P = 0.9
TIMEOUT_SECONDS = 180  # 3 minutes
MAX_RETRIES = 3

# System Prompt for LLM
SYSTEM_PROMPT = """
You are an expert developer specializing in code refactoring, created to assist with optimizing code scripts. Your role is to refactor entire scripts while preserving their original functionality.

IMPORTANT: Your response must contain ONLY the refactored code with no explanations, comments about changes, or formatting markers. Do not include markdown code blocks, explanations, or anything else that is not part of the actual code.
"""

# User Prompt Template
REFACTOR_PROMPT = """
Refactor this code (Python, Svelte, or TypeScript) with the following improvements:

1. Remove all code related to these components and their derivatives:
   - Gemini, ComfyUI, Tika, document intelligence, checkForVersionUpdates
   - webhookUrl, LDAP, oauth
   - enable_community_sharing, ShareChatModal.svelte
   - ENABLE_CHANNELS, channels.py, channel_id
   - permissions.chat.controls, Controls\\Controls.svelte, Controls\\Valves.svelte
   - playground, nonLocalVoices, haptic, mobile
   - All i18n related code and references

2. Optimize performance while maintaining readability:
   - Simplify logic flows
   - Eliminate redundancies and inefficiencies
   - Streamline operations

3. Clean up the codebase:
   - Remove unused imports and dead code

4. Improve naming:
   - Enhance function, class, and variable names defined within this script to better reflect their purpose
   - Only rename elements defined in this script, not imported ones

5. Enhance error handling:
   - Use specific exception types (ValueError, IOError, etc.)
   - Include actionable error messages

6. Improve documentation:
   - Replace comments with appropriate docstrings for modules, functions, and classes
   - Retain inline comments for notes not suitable for docstrings

IMPORTANT: Return ONLY the refactored code without explanations, markdown formatting, or change summaries.

Code:
{code_content}
"""


def fetch_llm_response(
    prompt: str,
    system_prompt: str = SYSTEM_PROMPT,
    model: str = LLM_MODEL,
    temperature: float = LLM_TEMPERATURE,
    top_p: float = LLM_TOP_P,
    retries: int = MAX_RETRIES,
) -> str:
    """Fetch response from the Groq API and return the refactored script."""
    if not prompt or not isinstance(prompt, str):
        raise ValueError("Prompt must be a non-empty string")

    if not GROQ_API_KEY or GROQ_API_KEY == "type_your_api_key_here":
        raise ValueError("Please set a valid GROQ_API_KEY at the top of the script")

    client = Groq(api_key=GROQ_API_KEY)

    for attempt in range(retries):
        try:
            print("Sending request to Groq API: ", end="", flush=True)
            chat_completion = client.chat.completions.create(
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": prompt},
                ],
                model=model,
                temperature=temperature,
                top_p=top_p,
                stream=False,
            )
            response = chat_completion.choices[0].message.content
            print("Response received.", flush=True)
            return extract_code(response)

        except Exception as e:
            if attempt == retries - 1:
                raise Exception(f"Failed after {retries} attempts: {e}")
            print(f"Attempt {attempt + 1} failed: {e}. Retrying in 2 seconds...")
            time.sleep(2)


def extract_code(response: str) -> str:
    """Extract only the code from the LLM response, removing any non-code content."""
    # Try to find code blocks with markdown formatting
    code_block_pattern = r"```(?:python|typescript|svelte|js|jsx|ts|tsx)?(.*?)```"
    code_blocks = re.findall(code_block_pattern, response, re.DOTALL)
    if code_blocks:
        return "\n\n".join(block.strip() for block in code_blocks)

    # If no code blocks found, try to filter out explanations
    explanation_markers = [
        "Here's the refactored", "The refactored", "I've refactored",
        "Changes made:", "### Changes", "## Changes", "# Changes",
        "Explanation:", "### Explanation", "## Explanation", "# Explanation",
        "Summary of changes:", "In summary,", "To summarize,"
    ]
    lines = response.split('\n')
    code_lines = []
    in_code_section = True

    for line in lines:
        if any(marker.lower() in line.lower() for marker in explanation_markers):
            in_code_section = False
            continue
        if in_code_section:
            code_lines.append(line)

    return "\n".join(code_lines) if code_lines else response


def load_processed_files(tracker_file: str) -> Set[str]:
    """Load the set of already processed file paths from a tracker file."""
    processed = set()
    if os.path.exists(tracker_file):
        with open(tracker_file, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if line and not line.startswith("#"):
                    processed.add(line)
    return processed


def save_processed_files(tracker_file: str, processed: Set[str]) -> None:
    """Save the set of processed file paths to a tracker file."""
    with open(tracker_file, "w", encoding="utf-8") as f:
        f.write("# processed_files.txt\n")
        f.write("# Tracks files already refactored\n\n")
        for file_path in sorted(processed):
            f.write(f"{file_path}\n")


def is_valid_python(code: str) -> bool:
    """Check if the code is syntactically valid Python."""
    try:
        ast.parse(code)
        return True
    except SyntaxError:
        return False


def is_valid_code(file_path: str, code: str) -> bool:
    """Validate code based on file extension."""
    if file_path.endswith(".py"):
        return is_valid_python(code)
    # For other file types, we can't easily validate syntax
    # You might want to add validation for TypeScript or Svelte if needed
    return True


def process_single_file(file_path: str) -> bool:
    """Process an entire file by refactoring it with the LLM."""
    try:
        # Try with UTF-8 first
        with open(file_path, "r", encoding="utf-8") as f:
            original_content = f.read()
    except UnicodeDecodeError:
        try:
            # If UTF-8 fails, detect encoding
            with open(file_path, "rb") as f:
                raw = f.read()
                detected = chardet.detect(raw)
                encoding = detected["encoding"] or "latin1"
            with open(file_path, "r", encoding=encoding) as f:
                original_content = f.read()
            print(f"Warning: {file_path} decoded with fallback encoding '{encoding}'")
        except Exception as e:
            print(f"Error decoding {file_path}: {e}")
            return False
    except OSError as e:
        print(f"Error reading {file_path}: {e}")
        return False

    print(f"Processing entire file: {file_path}")
    prompt = REFACTOR_PROMPT.format(code_content=original_content)

    try:
        refactored_content = fetch_llm_response(prompt)

        # Show a preview of the refactored content
        tqdm.write(f"Refactored content preview (first few lines):")
        preview_lines = refactored_content.split('\n')[:5]
        for line in preview_lines:
            tqdm.write(f"  {line}")
        if len(preview_lines) < len(refactored_content.split('\n')):
            tqdm.write("  ...")

        # Validate the refactored code
        if file_path.endswith(".py") and not is_valid_python(refactored_content):
            print(f"Error: Refactored code for {file_path} is invalid Python. Keeping original.")
            print(f"First 200 characters of invalid code: {refactored_content[:200]}")
            return False

        if refactored_content == original_content:
            print(f"No changes were made to {file_path}")
            return False

        # Ensure file ends with newline
        if not refactored_content.endswith("\n"):
            refactored_content += "\n"

        # Write the refactored content back to the file
        with open(file_path, "w", encoding="utf-8") as f:
            f.write(refactored_content)
        print(f"Refactored and overwrote {file_path}")
        return True

    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return False


def get_files_to_process(directory: str) -> List[str]:
    """Recursively find all Python, Svelte, and TypeScript files in the directory, excluding SKIP_FOLDERS."""
    files_to_process = []
    for root, dirs, files in os.walk(directory):
        # Filter out directories to skip
        dirs[:] = [d for d in dirs if d not in SKIP_FOLDERS]
        for file in files:
            if file.endswith((".py", ".svelte", ".ts", ".tsx")):
                full_path = os.path.join(root, file)
                files_to_process.append(full_path)
    return files_to_process


def clear_terminal() -> None:
    """Clear the terminal screen."""
    os.system("cls" if os.name == "nt" else "clear")


def main() -> None:
    """Process up to FILES_PER_RUN scripts with a progress bar."""
    # Validate directory
    if not os.path.exists(DEFAULT_DIRECTORY):
        print(f"Error: Directory {DEFAULT_DIRECTORY} does not exist.")
        return

    # Load previously processed files
    processed_files = load_processed_files(PROCESSED_FILES_TRACKER)

    # Get all files to process
    files_to_process = get_files_to_process(DEFAULT_DIRECTORY)
    if not files_to_process:
        print(f"No Python, Svelte, or TypeScript files found in {DEFAULT_DIRECTORY} (excluding {SKIP_FOLDERS}).")
        return

    # Filter out already processed files
    remaining_files = [f for f in files_to_process if f not in processed_files]
    if not remaining_files:
        print("All files have already been processed.")
        return

    # Select files for this run
    files_this_run = remaining_files[:FILES_PER_RUN]
    print(f"Found {len(remaining_files)} unprocessed files. Processing {len(files_this_run)} this run.")
    print(f"Skipping folders: {SKIP_FOLDERS}")

    # Process files with progress bar
    with tqdm(total=len(files_this_run), desc="Refactoring Files", unit="file") as pbar:
        for i, file_path in enumerate(files_this_run, 1):
            tqdm.write(f"Processing file {i}/{len(files_this_run)}: {file_path}")
            if process_single_file(file_path):
                processed_files.add(file_path)
                save_processed_files(PROCESSED_FILES_TRACKER, processed_files)
            pbar.update(1)

    # Summary
    remaining = len(remaining_files) - len(files_this_run)
    print(f"Processed {len(files_this_run)} files this run. {remaining} files remain unprocessed.")
    print("Refactoring batch complete! Check the results and run again for the next batch.")


if __name__ == "__main__":
    main()

Found 369 unprocessed files. Processing 10 this run.
Skipping folders: {'.github', 'code_helper', 'ollama-0'}


Refactoring Files:   0%|          | 0/10 [00:00<?, ?file/s]

Processing file 1/10: C:/Users/HJN/Desktop/open-webui - Copy/app/src\app.d.ts
Processing entire file: C:/Users/HJN/Desktop/open-webui - Copy/app/src\app.d.ts
Sending request to Groq API: Response received.


Refactoring Files:  10%|█         | 1/10 [00:01<00:12,  1.44s/file]

Refactored content preview (first few lines):
  declare global {
  	namespace App {
  	}
  }
  
  ...
Refactored and overwrote C:/Users/HJN/Desktop/open-webui - Copy/app/src\app.d.ts
Processing file 2/10: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\constants.ts
Processing entire file: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\constants.ts
Sending request to Groq API: Response received.


Refactoring Files:  20%|██        | 2/10 [00:04<00:20,  2.59s/file]

Refactored content preview (first few lines):
  import os
  
  APP_NAME = 'RIC'
  
  def get_webui_hostname():
  ...
Refactored and overwrote C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\constants.ts
Processing file 3/10: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\index.ts
Processing entire file: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\index.ts
Sending request to Groq API: Response received.


Refactoring Files:  30%|███       | 3/10 [00:07<00:19,  2.82s/file]

Refactored content preview (first few lines):
  import logging
  import os
  from typing import Dict, List
  
  class Configuration:
  ...
Refactored and overwrote C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\index.ts
Processing file 4/10: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\index.ts
Processing entire file: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\index.ts
Sending request to Groq API: Attempt 1 failed: Error code: 413 - {'error': {'message': 'Request too large for model `llama-3.3-70b-versatile` in organization `org_01j1qez3f7fj98shsdn8gf3qsm` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 6623, please reduce your message size and try again. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}. Retrying in 2 seconds...
Sending request to Groq API: Attempt 2 failed: Error code: 413 - {'error': {'message': 'Request too large for model `llama-3.3-70b-ve

Refactoring Files:  40%|████      | 4/10 [00:13<00:22,  3.74s/file]

Error processing C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\index.ts: Failed after 3 attempts: Error code: 413 - {'error': {'message': 'Request too large for model `llama-3.3-70b-versatile` in organization `org_01j1qez3f7fj98shsdn8gf3qsm` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 6623, please reduce your message size and try again. Visit https://console.groq.com/docs/rate-limits for more information.', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
Processing file 5/10: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\audio\index.ts
Processing entire file: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\audio\index.ts
Sending request to Groq API: Response received.


Refactoring Files:  50%|█████     | 5/10 [00:16<00:18,  3.78s/file]

Refactored content preview (first few lines):
  import requests
  from typing import Dict, List, Optional
  from urllib.parse import urljoin
  
  AUDIO_API_BASE_URL = "https://api.example.com/audio"
  ...
Refactored and overwrote C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\audio\index.ts
Processing file 6/10: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\auths\index.ts
Processing entire file: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\auths\index.ts
Sending request to Groq API: Response received.


Refactoring Files:  60%|██████    | 6/10 [00:31<00:29,  7.34s/file]

Refactored content preview (first few lines):
  import { WEBUI_API_BASE_URL } from '$lib/constants';
  
  /**
   * Retrieves the admin details.
   * 
  ...
Refactored and overwrote C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\auths\index.ts
Processing file 7/10: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\channels\index.ts
Processing entire file: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\channels\index.ts
Sending request to Groq API: Response received.


Refactoring Files:  70%|███████   | 7/10 [00:35<00:19,  6.44s/file]

Refactored content preview (first few lines):
  import requests
  
  class API:
      def __init__(self, base_url):
          self.base_url = base_url
  ...
Refactored and overwrote C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\channels\index.ts
Processing file 8/10: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\chats\index.ts
Processing entire file: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\chats\index.ts
Sending request to Groq API: Response received.


Refactoring Files:  80%|████████  | 8/10 [00:54<00:20, 10.21s/file]

Refactored content preview (first few lines):
  import requests
  from urllib.parse import urlencode
  from typing import Optional
  
  WEBUI_API_BASE_URL = "https://api.example.com"
  ...
Refactored and overwrote C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\chats\index.ts
Processing file 9/10: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\configs\index.ts
Processing entire file: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\configs\index.ts
Sending request to Groq API: Response received.


Refactoring Files:  90%|█████████ | 9/10 [00:59<00:08,  8.66s/file]

Refactored content preview (first few lines):
  import requests
  import json
  
  WEBUI_API_BASE_URL = "https://example.com/api"
  
  ...
Refactored and overwrote C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\configs\index.ts
Processing file 10/10: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\evaluations\index.ts
Processing entire file: C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\evaluations\index.ts
Sending request to Groq API: Response received.


Refactoring Files: 100%|██████████| 10/10 [01:20<00:00,  8.06s/file]

Refactored content preview (first few lines):
  import requests
  from typing import Optional
  from urllib.parse import urljoin
  
  class EvaluationClient:
  ...
Refactored and overwrote C:/Users/HJN/Desktop/open-webui - Copy/app/src\lib\apis\evaluations\index.ts
Processed 10 files this run. 359 files remain unprocessed.
Refactoring batch complete! Check the results and run again for the next batch.



