In [None]:
import os
import re
import csv
from typing import Dict, List
from tqdm import tqdm  # Import tqdm for the progress bar


def parse_csv_file(csv_path: str) -> List[Dict[str, str]]:
    """Parse CSV file into a list of dictionaries."""
    with open(csv_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        return list(reader)


def read_file_safe(file_path: str) -> str:
    """
    Attempt to read a file with multiple encodings to handle potential Unicode errors.
    Returns the file content as a string.
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read()
    except UnicodeDecodeError:
        try:
            with open(file_path, 'r', encoding='ISO-8859-1') as f:
                return f.read()
        except UnicodeDecodeError:
            try:
                with open(file_path, 'r', encoding='windows-1252') as f:
                    return f.read()
            except Exception as e:
                print(f"Error reading {file_path}: {e}")
                return ''


def rename_file(file_path: str, element_name: str, suggested_name: str) -> bool:
    """
    Renames the file if the element_name is found in the filename.
    """
    dir_path = os.path.dirname(file_path)
    base_name = os.path.basename(file_path)

    if element_name in base_name:
        new_base_name = base_name.replace(element_name, suggested_name)
        new_file_path = os.path.join(dir_path, new_base_name)

        if new_file_path != file_path:
            os.rename(file_path, new_file_path)
            return True  # Indicating the file was renamed
    return False


def process_file(file_path: str, rename_map: Dict[str, str]) -> int:
    """
    Process a single file, replacing element names with suggested names.
    Replaces all occurrences of the element names.
    Also renames the file if a match is found.
    """
    content = read_file_safe(file_path)

    if not content:
        return 0  # Return 0 if the file couldn't be read

    original_content = content
    changes_made = 0

    for element_name, suggested_name in rename_map.items():
        # Rename file if element name is found in the filename
        if rename_file(file_path, element_name, suggested_name):
            print(f"Renamed file: {file_path} to {os.path.basename(file_path).replace(element_name, suggested_name)}")

        # Replace element names inside the file content
        pattern = fr'\b{re.escape(element_name)}\b'
        new_content, replacements = re.subn(pattern, suggested_name, content)
        changes_made += replacements
        content = new_content

    if content != original_content:
        # If content has changed, overwrite the file
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(content)

    return changes_made


def process_csv(repo_path: str, input_csv_file: str, batch_size: int):
    """
    Process the CSV file in batches, applying the changes to the files in the repository.
    Saves the result to the output CSV file.
    """
    # Parse the CSV file
    try:
        rename_rules = parse_csv_file(input_csv_file)
    except Exception as e:
        print(f"Error reading CSV file: {e}")
        return

    # Sort the CSV by "Element Name" column
    rename_rules.sort(key=lambda x: x['Element Name'])

    total_changes = 0
    current_index = 0

    # Find all files in the repository, excluding config folders
    files = []
    for root, _, filenames in os.walk(repo_path):
        # Skip config folders
        if 'config' in root.lower():
            continue
        for filename in filenames:
            files.append(os.path.join(root, filename))

    # If no files are found
    if not files:
        print("No files found to process.")
        return

    # Progress bar for processing batches
    total_batches = len(rename_rules) // batch_size + (1 if len(rename_rules) % batch_size > 0 else 0)

    # Process the CSV in batches with a progress bar
    with tqdm(total=total_batches, desc="Processing Batches", unit="batch") as pbar:
        while current_index < len(rename_rules):
            # Determine the next batch of element names from the CSV
            batch = rename_rules[current_index:current_index + batch_size]
            rename_map = {rule['Element Name']: rule['Suggested Name'] for rule in batch}

            # Process each file with the current batch of element names
            for file_path in tqdm(files, desc="Processing Files", unit="file"):
                changes = process_file(file_path, rename_map)
                total_changes += changes

            # Update the index for the next batch
            current_index += batch_size
            pbar.update(1)  # Update the batch progress bar

            # Ask if the user wants to continue processing the next batch
            if current_index < len(rename_rules):
                continue_processing = input(f"\nProcessed {batch_size} replacements. Do you want to process the next batch? (y/n): ").lower()
                if continue_processing != 'y':
                    break

    print(f"\nTotal replacements made: {total_changes}")


# Example usage
# if __name__ == "__main__":
#     # Prompt user for input
#     repo_path = input("Enter the path to your repository: ")
#     input_csv_file = input("Enter the path to your CSV file: ")
#     batch_size = int(input("Enter the batch size for processing (e.g., 10, 20): "))
#
#     # Start processing
#     process_csv(repo_path, input_csv_file, batch_size)


# Prompt user for input
repo_path = r"C:\Users\harold.noble\Desktop\Not workling\ric_new - Copy - Copy\ric"
input_csv_file = r"C:\Users\harold.noble\Desktop\Not workling\ric_new - Copy - Copy\code_helper\work\processed_output_file.csv"
batch_size = 50

# Start processing
process_csv(repo_path, input_csv_file, batch_size)

In [None]:
# Prompt user for input
repo_path = r"C:\Users\harold.noble\Desktop\Not workling\ric_new - Copy - Copy\ric"
input_csv_file = r"C:\Users\harold.noble\Desktop\Not workling\ric_new - Copy - Copy\code_helper\work\processed_output_file.csv"
batch_size = 50

# Start processing
process_csv(repo_path, input_csv_file, batch_size)