In [6]:
import os
import datetime


def is_code_file(filename):
    """Check if a file is a programming language file based on extension"""
    code_extensions = {
        '.py', '.java', '.cpp', '.c', '.h', '.js',
        '.html', '.css', '.php', '.rb', '.go', '.rs',
        '.ts', '.sql', '.sh', '.kt', '.swift', '.cs', '.svelte'
    }
    return any(filename.lower().endswith(ext) for ext in code_extensions)


def combine_code_files(repo_path, num_files, output_file="combined_code.txt", log_file="processed_files.log"):
    """
    Combine specified number of code files from repository into one text file
    Keeps track of processed files to avoid duplicates on subsequent runs
    Skips the 'code_helper' folder
    Shows progress and remaining files to process
    """
    # Load previously processed files
    processed_files = set()
    if os.path.exists(log_file):
        with open(log_file, 'r') as log:
            processed_files = set(line.strip() for line in log)

    # Get all files from repository, skipping code_helper folder
    all_files = []
    for root, _, files in os.walk(repo_path):
        # Skip if the path contains 'code_helper'
        if 'code_helper' in root.split(os.sep):
            continue
        for file in files:
            full_path = os.path.join(root, file)
            if (is_code_file(file) and
                full_path not in processed_files and
                    os.path.isfile(full_path)):
                all_files.append(full_path)

    # Display total files found and how many are left to process
    total_files = len(all_files)
    print(f"Found {total_files} code files to process")
    print(f"Previously processed: {len(processed_files)} files")
    print(f"Files left to process: {total_files}")

    # Check if we have enough files
    if total_files < num_files:
        print(f"Warning: Only {total_files} code files found, requested {num_files}")
        num_files = total_files

    # Process specified number of files
    files_processed = 0
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    output_filename = f"{output_file}_{timestamp}.txt"
    with open(output_filename, 'w', encoding='utf-8') as outfile:
        with open(log_file, 'a', encoding='utf-8') as logfile:
            for i, file_path in enumerate(all_files):
                if files_processed >= num_files:
                    break
                try:
                    # Show progress
                    print(f"Processing file {i+1}/{total_files}: {os.path.basename(file_path)}")
                    print(f"Files left to process: {total_files - (i+1)}")

                    with open(file_path, 'r', encoding='utf-8') as infile:
                        # Write file header
                        relative_path = os.path.relpath(file_path, repo_path)
                        outfile.write(f"\n{'='*50}\n")
                        outfile.write(f"File: {relative_path}\n")
                        outfile.write(f"{'='*50}\n\n")
                        # Write file contents
                        outfile.write(infile.read())
                        outfile.write("\n\n")
                        # Log processed file
                        logfile.write(f"{file_path}\n")
                        files_processed += 1
                except Exception as e:
                    print(f"Error processing {file_path}: {str(e)}")

    print(f"Processed {files_processed} files")
    print(f"Files remaining in repo: {total_files - files_processed}")
    print(f"Output written to {output_filename}")
    print(f"Processed files logged in {log_file}")


def main():

    # Get user input

    repo_path = r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src\lib\workers"  # input("Enter the repository path: ")

    while not os.path.isdir(repo_path):

        print("Invalid path. Please enter a valid directory path.")

        repo_path = input("Enter the repository path: ")


    try:

        num_files = int(300) #input("Enter the number of files to combine: "))

        if num_files <= 0:

            raise ValueError("Number must be positive")

    except ValueError:

        print("Invalid number. Please enter a positive integer.")
        return


    # Run the combination

    combine_code_files(repo_path, num_files)



if __name__ == "__main__":
    main()

Found 3 code files to process
Previously processed: 46 files
Files left to process: 3
Processing file 1/3: kokoro.worker.ts
Files left to process: 2
Processing file 2/3: KokoroWorker.ts
Files left to process: 1
Processing file 3/3: pyodide.worker.ts
Files left to process: 0
Processed 3 files
Files remaining in repo: 0
Output written to combined_code.txt_20250307_162135.txt
Processed files logged in processed_files.log


Dependency analysis written to dependency_analysis.txt
Processed 289 of 289 files in 22 groups
