In [7]:
#!/usr/bin/env python3
"""
Script to refactor Python files by processing top-level 'def' or 'class' chunks one at a time with LLM streaming.
Tracks processed files and saves refactored code back to the original files.
Allows skipping specified subfolders. Focuses on docstrings, formatting, optimization, error handling, and cleanup.
"""

import ast
import json
import os
import re
import requests
import time
from typing import List, Set, Tuple

import chardet
from tqdm import tqdm

# Constants
DEFAULT_DIRECTORY = r"C:\Users\harold.noble\Desktop\open-webui\app\backend\webui\storage"
SKIP_FOLDERS = {".github", "code_helper", "ollama-0"}
PROCESSED_FILES_TRACKER = "processed_files.txt"
FILES_PER_RUN = 10
LLM_MODEL = "qwen2.5-coder:14b"
LLM_TEMPERATURE = 0.5
LLM_TOP_P = 0.9
DEFAULT_OLLAMA_PORT = "11434"
TIMEOUT_SECONDS = 60 * 3
MAX_RETRIES = 3

# System Prompt for LLM
SYSTEM_PROMPT = """
You are an expert Python developer specializing in code refactoring, created to assist with optimizing Python scripts. Your role is to refactor Python code chunks—typically a single top-level function (def) or class—while preserving their original functionality.

IMPORTANT: Your response must contain ONLY the refactored Python code with no explanations, comments about changes, or formatting markers. Do not include markdown code blocks, explanations, or anything else that is not part of the actual Python code.
"""

# User Prompt Template
REFACTOR_PROMPT = """
Refactor this Python script chunk with the following improvements:

1. Optimize performance without sacrificing readability—simplify logic, remove inefficiencies, and streamline operations.
2. Remove unused imports and dead code (do not remove i18n references unless explicitly unused).
3. Format according to Black's style (88-character line length, consistent indentation, sorted imports alphabetically).
4. Enhance error handling with specific exceptions (e.g., ValueError, IOError) and actionable messages where missing.
5. Replace comments with PEP 257-compliant docstrings for modules, functions, and classes; retain # for inline notes not suited to docstrings.
6. Improve function and class names defined in this chunk to better reflect their purpose, using snake_case. Do not rename variables, functions, or classes that are not defined within this chunk (e.g., imported names).

IMPORTANT: Respond with ONLY the refactored Python code, without explanations, markdown code blocks, or comments about the changes. Return just the code itself.

Script chunk:
{script_content}
"""


def fetch_llm_response(
    prompt: str,
    system_prompt: str = SYSTEM_PROMPT,
    model: str = LLM_MODEL,
    temperature: float = LLM_TEMPERATURE,
    top_p: float = LLM_TOP_P,
    retries: int = MAX_RETRIES,
) -> str:
    """Stream response from the LLM API and return the full refactored chunk."""
    if not prompt or not isinstance(prompt, str):
        raise ValueError("Prompt must be a non-empty string")

    ollama_port = os.getenv("OLLAMA_PORT", DEFAULT_OLLAMA_PORT)
    url = f"http://localhost:{ollama_port}/api/generate"
    payload = {
        "model": model,
        "system": system_prompt,
        "prompt": prompt,
        "stream": True,  # Enable streaming for live monitoring
        "temperature": temperature,
        "top_p": top_p,
    }
    headers = {"Content-Type": "application/json"}

    full_response = ""
    for attempt in range(retries):
        try:
            response = requests.post(url, json=payload, headers=headers, timeout=TIMEOUT_SECONDS, stream=True)
            response.raise_for_status()
            print("Streaming LLM response: ", end="", flush=True)
            for line in response.iter_lines():
                if line:
                    chunk = line.decode("utf-8")
                    try:
                        data = json.loads(chunk)
                        if "response" in data:
                            print(data["response"], end="", flush=True)
                            full_response += data["response"]
                    except json.JSONDecodeError as e:
                        print(f"\nWarning: Failed to parse chunk: {e}", flush=True)
            print()  # Newline after streaming completes

            # Process the response to extract only the Python code
            clean_response = extract_python_code(full_response)
            return clean_response

        except requests.RequestException as e:
            if attempt == retries - 1:
                raise requests.RequestException(f"Failed after {retries} attempts: {e}")
            print(f"Attempt {attempt + 1} failed: {e}. Retrying in 2 seconds...")
            time.sleep(2)


def extract_python_code(response: str) -> str:
    """
    Extract only the Python code from the LLM response, removing any explanations,
    markdown code blocks, or other non-code content.
    """
    # First, try to extract code from markdown code blocks if they exist
    code_block_pattern = r"```(?:python)?(.*?)```"
    code_blocks = re.findall(code_block_pattern, response, re.DOTALL)

    if code_blocks:
        # Join all code blocks (in case there are multiple)
        return "\n\n".join(block.strip() for block in code_blocks)

    # If no code blocks, try to identify response sections
    explanation_markers = [
        "Here's the refactored", "The refactored", "I've refactored",
        "Changes made:", "### Changes", "## Changes", "# Changes",
        "Explanation:", "### Explanation", "## Explanation", "# Explanation",
        "Summary of changes:", "In summary,", "To summarize,"
    ]

    lines = response.split('\n')
    code_lines = []
    in_code_section = True

    for line in lines:
        # Check if this line looks like the start of an explanation
        if any(marker.lower() in line.lower() for marker in explanation_markers):
            in_code_section = False
            continue

        if in_code_section:
            code_lines.append(line)

    # If we managed to filter out explanations, return the code
    if code_lines:
        return "\n".join(code_lines)

    # If all else fails, return the original response
    # The validity check will catch if this isn't proper Python
    return response


def load_processed_files(tracker_file: str) -> Set[str]:
    """Load the set of already processed file paths from a tracker file."""
    processed = set()
    if os.path.exists(tracker_file):
        with open(tracker_file, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if line and not line.startswith("#"):
                    processed.add(line)
    return processed


def save_processed_files(tracker_file: str, processed: Set[str]) -> None:
    """Save the set of processed file paths to a tracker file."""
    with open(tracker_file, "w", encoding="utf-8") as f:
        f.write("# processed_files.txt\n")
        f.write("# Tracks files already refactored\n\n")
        for file_path in sorted(processed):
            f.write(f"{file_path}\n")


def split_into_top_level_chunks(script_content: str) -> List[Tuple[int, int, str]]:
    """
    Split script into top-level 'def' or 'class' chunks with their start/end line numbers.

    Returns a list of tuples containing (start_line, end_line, chunk_text).
    """
    lines = script_content.splitlines()

    # Find all top-level classes and function definitions
    chunks = []
    current_chunk_start = None
    in_multiline_string = False
    string_delimiter = None

    # Get non-chunk content at the beginning of the file
    non_chunk_lines = []

    for i, line in enumerate(lines):
        stripped = line.strip()

        # Skip empty lines and comments at file beginning
        if not stripped or stripped.startswith('#'):
            if current_chunk_start is None:
                non_chunk_lines.append(line)
            continue

        # Handle multiline strings
        if not in_multiline_string:
            # Check for start of multiline string
            if ('"""' in line or "'''" in line):
                # Determine which delimiter is used
                if '"""' in line and "'''" in line:
                    # Both appear, find the first one
                    if line.find('"""') < line.find("'''"):
                        string_delimiter = '"""'
                    else:
                        string_delimiter = "'''"
                elif '"""' in line:
                    string_delimiter = '"""'
                else:
                    string_delimiter = "'''"

                # Check if the multiline string is closed on the same line
                if line.count(string_delimiter) % 2 != 0:
                    in_multiline_string = True
        else:
            # Check for end of multiline string
            if string_delimiter in line:
                in_multiline_string = False
            continue  # Skip processing this line further if in multiline string

        # Detect the start of a new top-level chunk
        if (line.startswith('def ') or line.startswith('class ')) and current_chunk_start is None:
            # Store any non-chunk content at beginning of file
            if non_chunk_lines and i > 0:
                header_content = "\n".join(non_chunk_lines)
                chunks.append((0, i - 1, header_content))
                non_chunk_lines = []

            current_chunk_start = i
        # Detect the end of the current chunk and the start of a new one
        elif (line.startswith('def ') or line.startswith('class ')) and current_chunk_start is not None:
            # Add the previous chunk
            chunk_content = "\n".join(lines[current_chunk_start:i])
            chunks.append((current_chunk_start, i - 1, chunk_content))
            current_chunk_start = i

    # Add the final chunk if there is one
    if current_chunk_start is not None:
        chunk_content = "\n".join(lines[current_chunk_start:])
        chunks.append((current_chunk_start, len(lines) - 1, chunk_content))
    # Or add any non-chunk content if the file has no chunks
    elif non_chunk_lines:
        header_content = "\n".join(non_chunk_lines)
        chunks.append((0, len(lines) - 1, header_content))

    return chunks


def is_valid_python(code: str) -> bool:
    """Check if the code is syntactically valid Python."""
    try:
        ast.parse(code)
        return True
    except SyntaxError:
        return False


def process_single_file(file_path: str) -> bool:
    """Process a single file by refactoring top-level 'def' or 'class' chunks one at a time."""
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            original_content = f.read()
    except UnicodeDecodeError:
        try:
            with open(file_path, "rb") as f:
                raw = f.read()
                detected = chardet.detect(raw)
                encoding = detected["encoding"] or "latin1"
            with open(file_path, "r", encoding=encoding) as f:
                original_content = f.read()
            print(f"Warning: {file_path} decoded with fallback encoding '{encoding}'")
        except Exception as e:
            print(f"Error decoding {file_path}: {e}")
            return False
    except OSError as e:
        print(f"Error reading {file_path}: {e}")
        return False

    chunks = split_into_top_level_chunks(original_content)
    if not chunks:
        print(f"Skipping {file_path}: No top-level 'def' or 'class' chunks found.")
        return False

    print(f"Found {len(chunks)} top-level chunks in {file_path}")
    modified_content = original_content

    with tqdm(total=len(chunks), desc=f"Refactoring {os.path.basename(file_path)}", unit="chunk") as pbar:
        for i, (start_line, end_line, chunk_content) in enumerate(chunks, 1):
            # Skip non-function/class chunks (like file headers, imports, etc.)
            if not chunk_content.lstrip().startswith(('def ', 'class ')):
                tqdm.write(f"Skipping non-function/class chunk {i}/{len(chunks)} (lines {start_line + 1}-{end_line + 1})")
                pbar.update(1)
                continue

            tqdm.write(f"Processing chunk {i}/{len(chunks)} (lines {start_line + 1}-{end_line + 1})")
            prompt = REFACTOR_PROMPT.format(script_content=chunk_content)

            try:
                refactored_chunk = fetch_llm_response(prompt)

                # Debug output to understand what the refactored code looks like
                tqdm.write(f"Refactored chunk preview (first few lines):")
                preview_lines = refactored_chunk.split('\n')[:5]
                for line in preview_lines:
                    tqdm.write(f"  {line}")
                if len(preview_lines) < len(refactored_chunk.split('\n')):
                    tqdm.write("  ...")

                if not is_valid_python(refactored_chunk):
                    tqdm.write(f"Error: Refactored chunk {i} is invalid Python. Skipping this chunk.")
                    tqdm.write(f"First 200 characters of invalid chunk: {refactored_chunk[:200]}")
                    pbar.update(1)
                    continue

                # Replace the chunk in the content while preserving line numbers
                original_lines = modified_content.splitlines()
                refactored_lines = refactored_chunk.splitlines()

                # Create new content by replacing the chunk
                new_lines = original_lines[:start_line] + refactored_lines + original_lines[end_line + 1:]
                new_content = "\n".join(new_lines)

                # Validate the combined code
                if not is_valid_python(new_content):
                    tqdm.write(f"Error: Combined code after chunk {i} is invalid Python. Reverting this chunk.")
                    continue  # Keep the original content

                # Update the modified content
                modified_content = new_content

            except requests.RequestException as e:
                tqdm.write(f"Error processing chunk {i}: {e}. Skipping this chunk.")
            except Exception as e:
                tqdm.write(f"Unexpected error processing chunk {i}: {e}. Skipping this chunk.")

            pbar.update(1)

    # Ensure trailing newline
    if not modified_content.endswith("\n"):
        modified_content += "\n"

    # Final validation
    if not is_valid_python(modified_content):
        print(f"Error: Final refactored code for {file_path} is invalid Python. Keeping original.")
        return False

    # Check if content actually changed
    if modified_content == original_content:
        print(f"No changes were made to {file_path}")
        return False

    try:
        with open(file_path, "w", encoding="utf-8") as f:
            f.write(modified_content)
        print(f"Refactored and overwrote {file_path}")
        return True
    except OSError as e:
        print(f"Error writing to {file_path}: {e}")
        return False


def get_python_files(directory: str) -> List[str]:
    """Recursively find all Python files in the directory, excluding SKIP_FOLDERS."""
    python_files = []
    for root, dirs, files in os.walk(directory):
        dirs[:] = [d for d in dirs if d not in SKIP_FOLDERS]
        for file in files:
            if file.endswith(".py"):
                full_path = os.path.join(root, file)
                python_files.append(full_path)
    return python_files


def clear_terminal() -> None:
    """Clear the terminal screen."""
    os.system("cls" if os.name == "nt" else "clear")


def main() -> None:
    """Process up to FILES_PER_RUN Python scripts with a progress bar."""
    if not os.path.exists(DEFAULT_DIRECTORY):
        print(f"Error: Directory {DEFAULT_DIRECTORY} does not exist.")
        return

    processed_files = load_processed_files(PROCESSED_FILES_TRACKER)
    python_files = get_python_files(DEFAULT_DIRECTORY)
    if not python_files:
        print(f"No Python files found in {DEFAULT_DIRECTORY} (excluding {SKIP_FOLDERS}).")
        return

    remaining_files = [f for f in python_files if f not in processed_files]
    if not remaining_files:
        print("All files have already been processed.")
        return

    files_to_process = remaining_files[:FILES_PER_RUN]
    print(f"Found {len(remaining_files)} unprocessed files. Processing {len(files_to_process)} this run.")
    print(f"Skipping folders: {SKIP_FOLDERS}")

    with tqdm(total=len(files_to_process), desc="Refactoring Files", unit="file") as pbar:
        for i, file_path in enumerate(files_to_process, 1):
            tqdm.write(f"Processing file {i}/{len(files_to_process)}: {file_path}")
            if process_single_file(file_path):
                processed_files.add(file_path)
                save_processed_files(PROCESSED_FILES_TRACKER, processed_files)
            pbar.update(1)
            # Don't clear terminal to help with debugging
            # clear_terminal()

    remaining = len(remaining_files) - len(files_to_process)
    print(f"Processed {len(files_to_process)} files this run. {remaining} files remain unprocessed.")
    print("Refactoring batch complete! Check the results and run again for the next batch.")


if __name__ == "__main__":
    main()

Found 1 unprocessed files. Processing 1 this run.
Skipping folders: {'ollama-0', 'code_helper', '.github'}


Refactoring Files:   0%|          | 0/1 [00:00<?, ?file/s]

Processing file 1/1: C:\Users\harold.noble\Desktop\open-webui\app\backend\webui\storage\provider.py
Found 7 top-level chunks in C:\Users\harold.noble\Desktop\open-webui\app\backend\webui\storage\provider.py



Refactoring Files:   0%|          | 0/1 [00:00<?, ?file/s]       
Refactoring Files:   0%|          | 0/1 [00:00<?, ?file/s]       

Skipping non-function/class chunk 1/7 (lines 1-31)
Processing chunk 2/7 (lines 32-49)
Streaming LLM response: ```python
from abc import ABC, abstractmethod
from typing import BinaryIO, Tuple


class storage_provider(ABC):
    """Abstract base class for storage providers."""

    @abstractmethod
    def get_file(self, file_path: str) -> str:
        """Retrieve a file from the storage.

        :param file_path: Path to the file.
        :return: Content of the file.
        :raises FileNotFoundError: If the file does not exist.
        """
        pass

    @abstractmethod
    def upload_file(
        self, file: BinaryIO, filename: str
    ) -> Tuple[bytes, str]:
        """Upload a file to the storage.

        :param file: File object containing the data to be uploaded.
        :param filename: Name of the file in the storage.
        :return: A tuple containing the file's bytes and its path in the storage.
        """
        pass

    @abstractmethod
    def delete_all_files(self)


Refactoring Files:   0%|          | 0/1 [00:08<?, ?file/s]                
Refactoring Files:   0%|          | 0/1 [00:08<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:08<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:08<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:08<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:08<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:08<?, ?file/s]               



Refactored chunk preview (first few lines):
  from abc import ABC, abstractmethod
  from typing import BinaryIO, Tuple
  
  
  class storage_provider(ABC):
  ...


Refactoring Files:   0%|          | 0/1 [00:08<?, ?file/s]               

Processing chunk 3/7 (lines 50-92)
Streaming LLM response: ```python
import os
import shutil
from typing import BinaryIO, Tuple

class LocalStorageProvider:
    @staticmethod
    def upload_file(file: BinaryIO, filename: str) -> Tuple[bytes, str]:
        """Uploads a file to the local storage.

        Args:
            file (BinaryIO): The binary stream of the file.
            filename (str): The name of the file.

        Returns:
            Tuple[bytes, str]: A tuple containing the file contents and the file path.

        Raises:
            ValueError: If the file content is empty.
        """
        contents = file.read()
        if not contents:
            raise ValueError("File content is empty.")
        file_path = os.path.join(UPLOAD_DIR, filename)
        with open(file_path, "wb") as f:
            f.write(contents)
        return contents, file_path

    @staticmethod
    def get_file(file_path: str) -> str:
        """Returns the path of the file from local storage.


Refactoring Files:   0%|          | 0/1 [00:24<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:24<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:24<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:24<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:24<?, ?file/s]               



Refactored chunk preview (first few lines):
  import os
  import shutil
  from typing import BinaryIO, Tuple
  


Refactoring Files:   0%|          | 0/1 [00:24<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:24<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:24<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:24<?, ?file/s]               

  class LocalStorageProvider:
  ...
Error: Combined code after chunk 3 is invalid Python. Reverting this chunk.
Processing chunk 4/7 (lines 93-165)
Streaming LLM response: ```python
import os
from typing import BinaryIO, Tuple
import boto3
from botocore.exceptions import ClientError

class S3StorageProvider(StorageProvider):
    def __init__(self):
        self.s3_client = boto3.client(
            "s3",
            region_name=S3_REGION_NAME,
            endpoint_url=S3_ENDPOINT_URL,
            aws_access_key_id=S3_ACCESS_KEY_ID,
            aws_secret_access_key=S3_SECRET_ACCESS_KEY,
        )
        self.bucket_name = S3_BUCKET_NAME
        self.key_prefix = S3_KEY_PREFIX if S3_KEY_PREFIX else ""

    def upload_file(self, file: BinaryIO, filename: str) -> Tuple[bytes, str]:
        """Uploads a file to S3 storage."""
        _, file_path = LocalStorageProvider.upload_file(file, filename)
        s3_key = os.path.join(self.key_prefix, filename)
        try:
            self.s3_cli


Refactoring Files:   0%|          | 0/1 [00:44<?, ?file/s]               
                                                                         
Refactoring provider.py:  29%|██▊       | 2/7 [00:44<00:22,  4.44s/chunk]


Refactored chunk preview (first few lines):
  import os


Refactoring Files:   0%|          | 0/1 [00:44<?, ?file/s]
Refactoring Files:   0%|          | 0/1 [00:44<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:44<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:44<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:44<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:44<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [00:44<?, ?file/s]               

  from typing import BinaryIO, Tuple
  import boto3
  from botocore.exceptions import ClientError
  
  ...
Error: Combined code after chunk 4 is invalid Python. Reverting this chunk.



Refactoring Files:   0%|          | 0/1 [00:44<?, ?file/s]               

Processing chunk 5/7 (lines 166-229)
Streaming LLM response: ```python
from typing import BinaryIO, Tuple
import json
from google.cloud import storage
from google.api_core.exceptions import NotFound

class gcs_storage_provider(StorageProvider):
    def __init__(self):
        self.bucket_name = GCS_BUCKET_NAME

        if GOOGLE_APPLICATION_CREDENTIALS_JSON:
            self.gcs_client = storage.Client.from_service_account_info(
                info=json.loads(GOOGLE_APPLICATION_CREDENTIALS_JSON)
            )
        else:
            # If no credentials json is provided, credentials will be picked up from the environment.
            # If running on local environment, credentials would be user credentials.
            # If running on a Compute Engine instance, credentials would be from Google Metadata server.
            self.gcs_client = storage.Client()
        self.bucket = self.gcs_client.bucket(GCS_BUCKET_NAME)

    def upload_file(self, file: BinaryIO, filename: str) -> Tuple[b


Refactoring Files:   0%|          | 0/1 [01:03<?, ?file/s]               
                                                                         


Refactored chunk preview (first few lines):
  from typing import BinaryIO, Tuple


Refactoring Files:   0%|          | 0/1 [01:03<?, ?file/s]
Refactoring Files:   0%|          | 0/1 [01:03<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:03<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:03<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:03<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:03<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:03<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:03<?, ?file/s]               

  import json
  from google.cloud import storage
  from google.api_core.exceptions import NotFound
  
  ...
Error: Combined code after chunk 5 is invalid Python. Reverting this chunk.
Processing chunk 6/7 (lines 230-297)
Streaming LLM response: ```python
from azure.storage.blob import BlobServiceClient, DefaultAzureCredential, ResourceNotFoundError
from typing import BinaryIO, Tuple

class azure_storage_provider(StorageProvider):
    def __init__(self):
        self.endpoint = AZURE_STORAGE_ENDPOINT
        self.container_name = AZURE_STORAGE_CONTAINER_NAME
        storage_key = AZURE_STORAGE_KEY

        if storage_key:
            self.blob_service_client = BlobServiceClient(account_url=self.endpoint, credential=storage_key)
        else:
            self.blob_service_client = BlobServiceClient(
                account_url=self.endpoint, credential=DefaultAzureCredential()
            )
        self.container_client = self.blob_service_client.get_container_client(self.container_name)


Refactoring Files:   0%|          | 0/1 [01:20<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:20<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:20<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:20<?, ?file/s]               
                                                                         


Refactored chunk preview (first few lines):
  from azure.storage.blob import BlobServiceClient, DefaultAzureCredential, ResourceNotFoundError
  from typing import BinaryIO, Tuple
  
  class azure_storage_provider(StorageProvider):


Refactoring Files:   0%|          | 0/1 [01:20<?, ?file/s]
Refactoring Files:   0%|          | 0/1 [01:20<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:20<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:20<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:20<?, ?file/s]               

      def __init__(self):
  ...
Error: Combined code after chunk 6 is invalid Python. Reverting this chunk.
Processing chunk 7/7 (lines 298-312)
Streaming LLM response: ```python
from typing import Union

class LocalStorageProvider:
    pass

class S3StorageProvider:
    pass

class GCSStorageProvider:
    pass

class AzureStorageProvider:
    pass

def get_storage_provider(storage_provider: str) -> Union[
    LocalStorageProvider, 
    S3StorageProvider, 
    GCSStorageProvider, 
    AzureStorageProvider
]:
    """
    Returns an instance of the storage provider based on the specified type.

    Args:
        storage_provider (str): The type of storage provider ('local', 's3', 'gcs', 'azure').

    Raises:
        ValueError: If the provided storage provider is not supported.
    """
    providers = {
        "local": LocalStorageProvider,
        "s3": S3StorageProvider,
        "gcs": GCSStorageProvider,
        "azure": AzureStorageProvider
    }
    
    if storage_provider not in


Refactoring Files:   0%|          | 0/1 [01:28<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:28<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:28<?, ?file/s]               
                                                                         
Refactoring provider.py:  29%|██▊       | 2/7 [01:28<00:22,  4.44s/chunk]


Refactored chunk preview (first few lines):
  from typing import Union
  
  class LocalStorageProvider:


Refactoring Files:   0%|          | 0/1 [01:29<?, ?file/s]
Refactoring Files:   0%|          | 0/1 [01:29<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:29<?, ?file/s]               
Refactoring Files:   0%|          | 0/1 [01:29<?, ?file/s]               
Refactoring provider.py:  29%|██▊       | 2/7 [01:29<03:42, 44.51s/chunk]

      pass
  
  ...
Error: Combined code after chunk 7 is invalid Python. Reverting this chunk.



Refactoring Files: 100%|██████████| 1/1 [01:29<00:00, 89.04s/file]

Refactored and overwrote C:\Users\harold.noble\Desktop\open-webui\app\backend\webui\storage\provider.py
Processed 1 files this run. 0 files remain unprocessed.
Refactoring batch complete! Check the results and run again for the next batch.



