In [13]:
import os
import datetime


def is_code_file(filename):
    """Check if a file is a programming language file based on extension"""
    code_extensions = {
        '.py', '.java', '.cpp', '.c', '.h', '.js',
        '.html', '.css', '.php', '.rb', '.go', '.rs',
        '.ts', '.sql', '.sh', '.kt', '.swift', '.cs', '.svelte'
    }
    return any(filename.lower().endswith(ext) for ext in code_extensions)


def combine_code_files(repo_path, num_files, output_file="combined_code.txt", log_file="processed_files.log"):
    """
    Combine specified number of code files from repository into one text file
    Keeps track of processed files to avoid duplicates on subsequent runs
    Skips the 'code_helper' folder
    Shows progress and remaining files to process
    """
    # Load previously processed files
    processed_files = set()
    if os.path.exists(log_file):
        with open(log_file, 'r') as log:
            processed_files = set(line.strip() for line in log)

    # Get all files from repository, skipping code_helper folder
    all_files = []
    for root, _, files in os.walk(repo_path):
        # Skip if the path contains 'code_helper'
        if 'code_helper' in root.split(os.sep):
            continue
        for file in files:
            full_path = os.path.join(root, file)
            if (is_code_file(file) and
                full_path not in processed_files and
                    os.path.isfile(full_path)):
                all_files.append(full_path)

    # Display total files found and how many are left to process
    total_files = len(all_files)
    print(f"Found {total_files} code files to process")
    print(f"Previously processed: {len(processed_files)} files")
    print(f"Files left to process: {total_files}")

    # Check if we have enough files
    if total_files < num_files:
        print(f"Warning: Only {total_files} code files found, requested {num_files}")
        num_files = total_files

    # Process specified number of files
    files_processed = 0
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    output_filename = f"{output_file}_{timestamp}.txt"
    with open(output_filename, 'w', encoding='utf-8') as outfile:
        with open(log_file, 'a', encoding='utf-8') as logfile:
            for i, file_path in enumerate(all_files):
                if files_processed >= num_files:
                    break
                try:
                    # Show progress
                    print(f"Processing file {i+1}/{total_files}: {os.path.basename(file_path)}")
                    print(f"Files left to process: {total_files - (i+1)}")

                    with open(file_path, 'r', encoding='utf-8') as infile:
                        # Write file header
                        relative_path = os.path.relpath(file_path, repo_path)
                        outfile.write(f"\n{'='*50}\n")
                        outfile.write(f"File: {relative_path}\n")
                        outfile.write(f"{'='*50}\n\n")
                        # Write file contents
                        outfile.write(infile.read())
                        outfile.write("\n\n")
                        # Log processed file
                        logfile.write(f"{file_path}\n")
                        files_processed += 1
                except Exception as e:
                    print(f"Error processing {file_path}: {str(e)}")

    print(f"Processed {files_processed} files")
    print(f"Files remaining in repo: {total_files - files_processed}")
    print(f"Output written to {output_filename}")
    print(f"Processed files logged in {log_file}")


def main():

    # Get user input

    repo_path = r"C:\Users\harold.noble\Desktop\open-webui - Copy\app\src\lib"  # input("Enter the repository path: ")

    while not os.path.isdir(repo_path):

        print("Invalid path. Please enter a valid directory path.")

        repo_path = input("Enter the repository path: ")


    try:

        num_files = int(300) #input("Enter the number of files to combine: "))

        if num_files <= 0:

            raise ValueError("Number must be positive")

    except ValueError:

        print("Invalid number. Please enter a positive integer.")
        return


    # Run the combination

    combine_code_files(repo_path, num_files)



if __name__ == "__main__":
    main()

Found 33 code files to process
Previously processed: 316 files
Files left to process: 33
Processing file 1/33: Badge.svelte
Files left to process: 32
Processing file 2/33: Banner.svelte
Files left to process: 31
Processing file 3/33: Checkbox.svelte
Files left to process: 30
Processing file 4/33: CodeEditor.svelte
Files left to process: 29
Processing file 5/33: Collapsible.svelte
Files left to process: 28
Processing file 6/33: ConfirmDialog.svelte
Files left to process: 27
Processing file 7/33: DragGhost.svelte
Files left to process: 26
Processing file 8/33: Drawer.svelte
Files left to process: 25
Processing file 9/33: Dropdown.svelte
Files left to process: 24
Processing file 10/33: FileItem.svelte
Files left to process: 23
Processing file 11/33: FileItemModal.svelte
Files left to process: 22
Processing file 12/33: Folder.svelte
Files left to process: 21
Processing file 13/33: Image.svelte
Files left to process: 20
Processing file 14/33: ImagePreview.svelte
Files left to process: 19
Pr

In [None]:
REPO_DIR = r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src"

Dependency analysis written to dependency_analysis.txt
Processed 289 of 289 files in 22 groups


In [None]:
import os
import re
from collections import defaultdict

REPO_DIR = r"C:\Users\harold.noble\Desktop\RIC\app\frontend\src"
SKIP_DIR = os.path.join(REPO_DIR, r"lib\components\icons")
OUTPUT_FILE = "dependency_analysis.txt"


def extract_svelte_script(file_path):
    """Extract script content from a Svelte file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
            script_match = re.search(r'<script[^>]*>(.*?)</script>', content, re.DOTALL)
            if script_match:
                return script_match.group(1).strip()
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return ""
    return ""


def normalize_import_path(import_path, file_dir):
    """Normalize import paths to absolute paths within the repo."""
    # Handle relative imports
    if import_path.startswith('./') or import_path.startswith('../'):
        # Resolve the path relative to the importing file
        abs_path = os.path.normpath(os.path.join(file_dir, import_path))
        # Convert to repo-relative path
        return os.path.relpath(abs_path, REPO_DIR).replace('\\', '/')

    # Handle absolute imports (from repo root)
    if import_path.startswith('/'):
        return import_path.lstrip('/')

    # Handle package imports (likely node_modules)
    return import_path


def get_imports(content, file_dir):
    """Extract and normalize all import paths from the content."""
    # Match both import ... from 'path' and import 'path' patterns
    import_patterns = [
        r'import.*from\s*[\'"](.+?)[\'"]',  # import X from 'path'
        r'import\s*[\'"](.+?)[\'"]'          # import 'path'
    ]

    normalized_imports = set()
    for pattern in import_patterns:
        imports = re.findall(pattern, content)
        for imp in imports:
            normalized_imports.add(normalize_import_path(imp, file_dir))

    return normalized_imports


def resolve_import_to_files(import_path, file_map):
    """Resolve an import path to actual file paths in the repo."""
    # Direct match
    if import_path in file_map:
        return [import_path]

    # Try adding extensions
    for ext in ['.ts', '.js', '.svelte']:
        with_ext = f"{import_path}{ext}"
        if with_ext in file_map:
            return [with_ext]

    # Try index files
    for ext in ['.ts', '.js', '.svelte']:
        index_path = f"{import_path}/index{ext}"
        if index_path in file_map:
            return [index_path]

    # Return empty list if no match found
    return []


def process_file(file_path):
    """Process a file to extract its content and imports."""
    file_dir = os.path.dirname(file_path)
    rel_file_path = os.path.relpath(file_path, REPO_DIR).replace('\\', '/')

    if file_path.endswith('.svelte'):
        content = extract_svelte_script(file_path)
    elif file_path.endswith(('.ts', '.js')):
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
        except Exception as e:
            print(f"Error reading {file_path}: {e}")
            return rel_file_path, "", set()
    else:
        return rel_file_path, "", set()

    imports = get_imports(content, file_dir)
    return rel_file_path, content, imports


def build_dependency_graph(file_info):
    """Build a graph of which files import which other files."""
    import_graph = defaultdict(set)

    # Create a map of normalized paths to file paths
    file_map = {}
    for file_path in file_info:
        rel_path = os.path.relpath(file_path, REPO_DIR).replace('\\', '/')
        file_map[rel_path] = file_path

    # Build the import graph
    for file_path, info in file_info.items():
        rel_path = os.path.relpath(file_path, REPO_DIR).replace('\\', '/')

        for import_path in info['imports']:
            resolved_files = resolve_import_to_files(import_path, file_map)

            for resolved in resolved_files:
                actual_file = file_map[resolved]
                import_graph[actual_file].add(file_path)

    return import_graph, file_map


def analyze_dependencies(output_file=OUTPUT_FILE):
    """Analyze dependencies and write results to file."""
    # Collect file info, skipping the icons directory
    file_info = {}
    processed_files = set()

    for root, _, files in os.walk(REPO_DIR):
        if root.startswith(SKIP_DIR):
            continue
        for file in files:
            if file.endswith(('.svelte', '.ts', '.js')):
                file_path = os.path.join(root, file)
                rel_path, content, imports = process_file(file_path)
                if content is not None:  # Include even empty content
                    file_info[file_path] = {
                        'content': content,
                        'imports': imports
                    }

    # Build dependency graph
    import_graph, file_map = build_dependency_graph(file_info)

    with open(output_file, 'w', encoding='utf-8') as out:
        group_num = 1

        # Continue until all files are processed
        while len(processed_files) < len(file_info):
            # Find files with lowest import count that haven't been processed
            unprocessed_files = {
                path: info for path, info in file_info.items()
                if path not in processed_files
            }

            if not unprocessed_files:
                break

            min_imports = min(len(info['imports']) for path, info in unprocessed_files.items())

            base_modules = {
                path: info for path, info in unprocessed_files.items()
                if len(info['imports']) == min_imports
            }

            if not base_modules:
                break

            # Add group marker
            out.write(f"## GROUP {group_num} - Files with {min_imports} imports\n\n")

            # Process each base module
            for base_path, info in base_modules.items():
                # Skip if already processed (safety check)
                if base_path in processed_files:
                    continue

                rel_base_path = os.path.relpath(base_path, REPO_DIR).replace('\\', '/')

                # Write the file name with a header and content
                out.write(f"### {rel_base_path}\n")
                out.write(f"{info['content']}\n\n")

                # Mark this file as processed
                processed_files.add(base_path)

                # Get files that import this one
                importing_files = list(import_graph.get(base_path, set()))

                # Process importing files if they exist
                for imp_path in importing_files:
                    if imp_path not in processed_files:  # Only include files not yet processed
                        rel_imp_path = os.path.relpath(imp_path, REPO_DIR).replace('\\', '/')
                        out.write(f"### {rel_imp_path}\n")
                        out.write(f"{file_info[imp_path]['content']}\n\n")

                        # Mark as processed
                        processed_files.add(imp_path)

            # Add separator between groups
            out.write(f"{'='*80}\n\n")
            group_num += 1

    print(f"Dependency analysis written to {output_file}")
    print(f"Processed {len(processed_files)} of {len(file_info)} files in {group_num-1} groups")


if __name__ == "__main__":
    analyze_dependencies()