In [26]:
def is_user_modified(file_path, exclude):
    """
    Heuristic: Consider files with standard filenames (e.g. package.json, package-lock.json,
    yarn.lock, README.md) as standard. Also, if any directory in the file path is one of the 
    standard folders (node_modules, .git) or in the exclude list, treat the file as standard.
    Everything else is assumed to be user modified.
    """
    standard_names = ["package.json", "package-lock.json", "yarn.lock", "README.md"] + exclude
    base_name = os.path.basename(file_path)
    if base_name in standard_names:
        return False

    # If any directory in the file path is known to be standard, treat it as standard.
    for part in file_path.split(os.sep):
        if part in ["node_modules", ".git"] + exclude:
            return False
    return True

    
def get_tree_children(dir_path, exclude=[], prefix="", allowed_extensions=None):
    """
    Recursively collects lines representing the folder tree structure (in markdown style)
    and a list of user modified code file paths (with full paths). Only includes directories
    that are not standard and files that have an allowed code extension and are user modified.
    """
    print("start of get_tree_children")
    if allowed_extensions is None:
        allowed_extensions = {
            ".html", ".htm", ".js", ".jsx", ".ts", ".tsx", ".css",
            ".py", ".java", ".c", ".cpp", ".cs", ".php", ".rb", ".go", ".rs", ".swift", ".kt",".md"
        }
    
    lines = []
    file_list = []
    
    try:
        items = sorted(os.listdir(dir_path))
        print("items ",items)
    except Exception as e :
        print("Exception in get_tree_children",e)
        return lines, file_list

    # Filter items: directories that are not standard, and files with allowed extensions.
    valid_items = []
    for item in items:
        item_path = os.path.join(dir_path, item)
        if os.path.isdir(item_path):
            if item in ["node_modules", ".git"] + exclude:
                continue
            valid_items.append(item)
        else:
            ext = os.path.splitext(item)[1].lower()
            if ext in allowed_extensions and is_user_modified(item_path, exclude):
                valid_items.append(item)

    count = len(valid_items)
    for idx, item in enumerate(valid_items):
        is_last = (idx == count - 1)
        connector = "└── " if is_last else "├── "
        item_path = os.path.join(dir_path, item)
        if os.path.isdir(item_path):
            lines.append(prefix + connector + item + "/")
            new_prefix = prefix + ("    " if is_last else "│   ")
            subtree_lines, subtree_files = get_tree_children(item_path, exclude, new_prefix, allowed_extensions)
            lines.extend(subtree_lines)
            file_list.extend(subtree_files)
        else:
            lines.append(prefix + connector + item)
            file_list.append(item_path)
    print("end of get_tree_children")
    return lines, file_list

def get_folder_structure_tree(root_dir, exclude=[], allowed_extensions=None):
    """
    Generates a markdown-friendly folder tree and a list of full file paths for user modified
    code files in the order printed. Only non-standard directories and files (with allowed extensions)
    are included.
    """
    base = os.path.basename(root_dir.rstrip(os.sep))
    print(base)
    tree_lines = [base + "/"]
    subtree_lines, file_list = get_tree_children(root_dir, exclude, prefix="", allowed_extensions=allowed_extensions)
    tree_lines.extend(subtree_lines)
    return tree_lines, file_list

In [29]:
import os
# Example usage:
bp = "F:/office_work/Prompt Store/docs"
tree_lines, file_paths = get_folder_structure_tree(bp, exclude=['hooks',"__pycache__"],allowed_extensions={".html", ".htm", ".js", ".jsx", ".ts", ".tsx", ".css",
            ".py", ".java", ".c", ".cpp", ".cs", ".php", ".rb", ".go", ".rs", ".swift", ".kt",".md"})
tree_lines = "\n".join(tree_lines)
print(tree_lines)

docs
start of get_tree_children
items  ['index.html', 'prompt_store_content.xlsx', 'script.js', 'style.css']
end of get_tree_children
docs/
├── index.html
├── script.js
└── style.css


In [30]:
file_choices = {"/".join(fp.split("\\")[1:]):fp for fp in file_paths}
file_choices

{'index.html': 'F:/office_work/Prompt Store/docs\\index.html',
 'script.js': 'F:/office_work/Prompt Store/docs\\script.js',
 'style.css': 'F:/office_work/Prompt Store/docs\\style.css'}

In [39]:

# Simple multi-select UI
import ipywidgets as widgets
from IPython.display import display


def load_and_count(path):
    with open(path, "r", encoding="utf-8") as f:
        text = f.read()
    L =  len(text.split(" "))
    L = round(L/1000,2)
    return L

options_with_counts = []
for name, content in file_choices.items():
    wc = load_and_count(content)
    label = f"{name} {wc}"
    options_with_counts.append((label, name))


multi = widgets.SelectMultiple(
    options=options_with_counts,
    value=list(file_choices.keys()),                      # preselected values if you want
    description='Files :',
    rows=8,
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='320px')
)

done_btn = widgets.Button(description='Done', button_style='primary')
out = widgets.Output()

def on_done(_):
    with out:
        out.clear_output()
        print("You selected:", list(multi.value))

done_btn.on_click(on_done)

display(widgets.VBox([multi, done_btn, out]))

VBox(children=(SelectMultiple(description='Files :', index=(0, 1, 2), layout=Layout(width='320px'), options=((…

In [40]:
#choose_file = 'routers/subscriptions.py'

selected_files = list(multi.value)
def get_file_content(file_path):    
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            content = file.read()
    except Exception as e:
        content = f"Could not read file: {e}"   
    return content

def get_all_import_statements(file_content):
    import_statements = []
    for line in file_content.split("\n"):
        if line.strip().startswith("import") or line.strip().startswith("from"):
            import_statements.append(line.strip())
    return import_statements

all_files_to_consider = selected_files.copy()
print(all_files_to_consider)

['index.html', 'script.js', 'style.css']


In [42]:
final_output = ""
separator = "-" * 80  # A line of dashes for separation
contents = []
file_paths_new = []
# Process each file in the list
for file_path in all_files_to_consider:
    file_path = bp+"/app/"+file_path
    # Compute the relative path from bp and normalize directory separators
    relative_path = os.path.relpath(file_path, bp)
    relative_path = relative_path.replace('\\', '/')
    
    # Split into directory and base file name
    dir_part = os.path.dirname(relative_path)
    base_name = os.path.basename(relative_path)
    
    # Prefix 'a' to the base file name
    modified_base =  base_name
    
    # If there is a directory part, join it with the modified base name
    display_name = f"{dir_part}/{modified_base}" if dir_part else modified_base
    
    # Try reading the file content
  #  print("file_path ",file_path)
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            content = file.read()
            file_paths_new.append(file_path)
            contents.append(content)
    except Exception as e:
        content = f"Could not read file: {e}"
    
    # Append the formatted block to final_output
    final_output += f"File Path : {display_name}\n\n"
    final_output += "File Content :\n"
    final_output += content + "\n"
    final_output += separator + "\n"


aftc_str = ""
for aftc in all_files_to_consider:
    aftc_str += "app/"+aftc+"\n"
    

final_output = """
Context :
I have created a web application, where I am using html, js and css

Folder structure:
"""+tree_lines+"""


Files whose codebases are given :
"""+aftc_str +"""

"""+separator+"""

"""+final_output
# Print the final formatted output
print(final_output)


Context :
I have created a web application, where I am using html, js and css

Folder structure:
docs/
├── index.html
├── script.js
└── style.css


Files whose codebases are given :
app/index.html
app/script.js
app/style.css


--------------------------------------------------------------------------------

File Path : app/index.html

File Content :
Could not read file: [Errno 2] No such file or directory: 'F:/office_work/Prompt Store/docs/app/index.html'
--------------------------------------------------------------------------------
File Path : app/script.js

File Content :
Could not read file: [Errno 2] No such file or directory: 'F:/office_work/Prompt Store/docs/app/script.js'
--------------------------------------------------------------------------------
File Path : app/style.css

File Content :
Could not read file: [Errno 2] No such file or directory: 'F:/office_work/Prompt Store/docs/app/style.css'
-------------------------------------------------------------------------------

In [43]:
print("Lines : ",len(final_output.split("\n"))) 
print("Words : ",len(final_output.split(" ")))
print("Tokens (k): ",round(len(final_output.split(" "))*(4/3000),1))

Lines :  35
Words :  75
Tokens (k):  0.1
