In [2]:
import os
import re
import tiktoken

encode = tiktoken.encoding_for_model("gpt-3.5-turbo").encode
count_token = lambda x: len(encode(x))

def remove_ignored_dirs(dirs, ignore_dirs):
    dirs[:] = [dir for dir in dirs if dir not in ignore_dirs]


def generate_tree(start_path: str, ignore_dirs: list, indent_size: int = 1) -> str:
    tree_lines = []
    for root, dirs, files in os.walk(start_path):
        remove_ignored_dirs(dirs, ignore_dirs)
        level = root.replace(start_path, '').count(os.sep)
        indent = ' ' * indent_size * level
        tree_lines.append(f"{indent}{os.path.basename(root)}/")
        sub_indent = ' ' * indent_size * (level + 1)
        tree_lines.extend(f"{sub_indent}{f}" for f in files)
    return '\n'.join(tree_lines[:-1])

FILE_EXTENSION_LANG_MAP = {
    ".py": "python",
    ".js": "javascript",
    ".ts": "typescript",
    ".java": "java",
    ".c": "c",
    ".cpp": "cpp",
    ".cs": "csharp",
    ".php": "php",
    ".rb": "ruby",
    ".swift": "swift",
    ".go": "go",
    ".r": "r",
    ".m": "objective-c",
    ".pl": "perl",
    ".md": "markdown",
    ".tsx": "typescript",
    ".jsx": "javascript",
}
COMMENT_SYMBOL_MAP = {
    "python": "#",
    "javascript": "//",
    "typescript": "//",
    "java": "//",
    "c": "//",
    "cpp": "//",
    "csharp": "//",
    "php": "//",
    "ruby": "#",
    "swift": "//",
    "go": "//",
    "r": "#",
    "objective-c": "//",
    "perl": "#",
    "css": "/*"
}

def get_lang_from_extension(file_extension: str) -> str:
    return FILE_EXTENSION_LANG_MAP.get(file_extension, file_extension[1:])

def process_file(root: str, file: str, relative_path: str, no_formatting: bool, print_token_counts: bool):
    lang = get_lang_from_extension(os.path.splitext(file)[-1])
    comment_symbol = COMMENT_SYMBOL_MAP.get(lang, "//")

    try:
        with open(os.path.join(root, file), "r", encoding='utf-8') as infile:
            data = infile.read()
    except UnicodeDecodeError:
        print(f"Error reading {relative_path}")
        return None

    if no_formatting:
        comment_pattern = re.escape(comment_symbol) + '.*$'
        data = re.sub(comment_pattern, ' ', data, flags=re.MULTILINE)
        data = re.sub(r'(?<=\n)[ \t]*(?=\n)', '', data)
        # remove all newlines
        data = re.sub(r'\n', ' ', data)
        data = re.sub(r';+', '; ', data)
        data = re.sub(r' +', ' ', data)

    token_count = count_token(data) + count_token(relative_path) if print_token_counts else 0

    return {
        "path": relative_path,
        "token_count": token_count,
        "data": data,
        "comment_symbol": comment_symbol,
        "lang": lang
    }

def walk_files(config: dict):
    cwd = os.getcwd()
    outputs = []

    zero_target_files = not config["target_files"]
    zero_allowed_extensions = not config["allowed_extensions"]

    for root, dirs, files in os.walk(config['path']):
        remove_ignored_dirs(dirs, config['ignore_dirs'])

        for file in files:
            if zero_allowed_extensions or (file.endswith(config['allowed_extensions']) and not file.endswith(config['ignored_extensions'])):
                if file in config['ignore_files']:
                    continue
                if zero_target_files or file in config['target_files']:
                    relative_path = f'{root.replace(cwd, "")}/{file}'
                    if relative_path[0] == '/':
                        relative_path = relative_path[1:]

                    output = process_file(root, file, relative_path, config['no_formatting'], config['print_token_counts'])
                    if output and output['data']:
                        outputs.append(output)

    outputs = sorted(outputs, key=lambda x: os.path.splitext(x['path'])[-1])
    sept = '\n\n---\n\n'
    tree = generate_tree(config['path'], config['ignore_dirs'], indent_size=2) + sept
    generate_output_content = lambda x: f"```{x['lang']}\n{x['comment_symbol']} {x['path']}\n{x['data']}\n```"

    if config['print_token_counts']:
        out_string = tree + sept.join([f"count: {x['token_count']}\n{generate_output_content(x)}" for x in outputs])
    else:
        out_string = tree + sept.join([generate_output_content(x) for x in outputs])

    total_tokens = count_token(out_string)

    with open(config['output_file'], "w", encoding='utf-8') as outfile:
        outfile.write(f'total: {total_tokens}\n\n{out_string}')

config = {
    'path': os.getcwd(), #"src/components/game", #
    'output_file': "output.txt",
    'target_files': [ "gameSetup.ts", "types.ts"], # [],
    'allowed_extensions': (), 
    'ignored_extensions': (),
    'print_token_counts': False,
    'no_formatting': True,
    'ignore_dirs': [".erb", "release", "favicon", ".husky", ".next", "node_modules", ".git", ".vscode", "__pycache__", "old", "test_page", "dist"],
    'ignore_files': ["example.ts", "output.txt", "yarn.lock", "package-lock.json", "package.json", "tsconfig.json", "run.ipynb", "tailwind.config.cjs"],
}

walk_files(config)
os.system("code output.txt")

0

In [4]:
import itertools

def generate_strings(letters, numbers, shapes):
    # Capitalize the first letter of each shape
    shapes = [shape.capitalize() for shape in shapes]
    # Generate all possible combinations of shapes, numbers, and letters
    combinations = list(itertools.product(shapes, numbers, letters))
    # Format the combinations into strings and return the result
    return [f'{shape}_{number}_{letter}' for shape, number, letter in combinations]

letters = ["a", "b", "c", "d", "e", "f", "g"]
numbers = [2, 3, 4, 6, 8, 9, 10]
shapes = ["circle", "square", "triangle", "diamond"]


all_strings = generate_strings(letters, numbers, shapes)
final_string = ""
for string in all_strings:
    final_string += f'"{string}" | '

print(final_string)

"Circle_2_a" | "Circle_2_b" | "Circle_2_c" | "Circle_2_d" | "Circle_2_e" | "Circle_2_f" | "Circle_2_g" | "Circle_3_a" | "Circle_3_b" | "Circle_3_c" | "Circle_3_d" | "Circle_3_e" | "Circle_3_f" | "Circle_3_g" | "Circle_4_a" | "Circle_4_b" | "Circle_4_c" | "Circle_4_d" | "Circle_4_e" | "Circle_4_f" | "Circle_4_g" | "Circle_6_a" | "Circle_6_b" | "Circle_6_c" | "Circle_6_d" | "Circle_6_e" | "Circle_6_f" | "Circle_6_g" | "Circle_8_a" | "Circle_8_b" | "Circle_8_c" | "Circle_8_d" | "Circle_8_e" | "Circle_8_f" | "Circle_8_g" | "Circle_9_a" | "Circle_9_b" | "Circle_9_c" | "Circle_9_d" | "Circle_9_e" | "Circle_9_f" | "Circle_9_g" | "Circle_10_a" | "Circle_10_b" | "Circle_10_c" | "Circle_10_d" | "Circle_10_e" | "Circle_10_f" | "Circle_10_g" | "Square_2_a" | "Square_2_b" | "Square_2_c" | "Square_2_d" | "Square_2_e" | "Square_2_f" | "Square_2_g" | "Square_3_a" | "Square_3_b" | "Square_3_c" | "Square_3_d" | "Square_3_e" | "Square_3_f" | "Square_3_g" | "Square_4_a" | "Square_4_b" | "Square_4_c" | "Sq