From 004f541af83648d0d486cb87beff8ed8bed66c17 Mon Sep 17 00:00:00 2001 From: hyle <7681+hyle@users.noreply.github.com> Date: Mon, 16 Mar 2026 18:15:16 +0100 Subject: [PATCH] add export metadata header to markdown output --- README.md | 51 ++++++++++++++++++++--------------- src/git_paths.c | 48 +++++++++++++++++++++++++++++++++ src/git_paths.h | 1 + src/main.c | 51 +++++++++++++++++++++++++++++++++-- src/render.c | 69 ++++++++++++++++++++++++++++++++++++++++++++--- src/render.h | 7 ++++- tests/test_cli.sh | 5 ++++ 7 files changed, 204 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index c2afd35..d3b8a13 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # fuoricode -A command-line tool that exports codebases into single Markdown artifacts optimized for LLM context and code review workflows. +A command-line tool that exports codebases into single Markdown artifacts optimized for LLM context and code review workflows. + ```bash fuori --staged -o review.md ``` @@ -74,7 +75,7 @@ Run `fuori` in any directory: fuori ``` -By default, it writes `_export.md` to the current directory. Inside a Git repo, it uses Git's view of the working tree (tracked + untracked non-ignored files). Outside a repo, or with `--no-git`, it falls back to the recursive filesystem walking. +By default it writes `_export.md` to the current directory. Inside a Git repo, it uses Git's view of the working tree (tracked + untracked non-ignored files). Outside a repo, or with `--no-git`, it falls back to the recursive filesystem walking. ### Options @@ -105,6 +106,7 @@ fuori [OPTIONS] Git selection flags (`--staged`, `--unstaged`, `--diff`) and `--from-stdin` are mutually exclusive; `--no-git` cannot be combined with them. **Examples:** + ```bash fuori # Export current working tree fuori --staged -o review.md # Staged changes to a named file @@ -119,29 +121,30 @@ fuori --max-tokens 180000 # Hard token budget fuori -o out.md --no-clobber # Refuse to overwrite ``` -## .gitignore File +## Ignore Rules -You can create a `.gitignore` file in the directory to specify files and patterns to exclude from the export. -These rules apply to the recursive filesystem walker, including `--no-git` mode and automatic fallback outside Git repositories. -This tool supports common gitignore-style rules, including comments, `!` negation, trailing `/` for directories, -root-anchored `/` patterns, and recursive `**` path globs such as `**/node_modules/` and `**/*.pyc`. -In filesystem mode, `fuori` also seeds a small built-in default ignore list even when no `.gitignore` is present: -`.git/`, `node_modules/`, `build/`, `dist/`, `bin/`, `.venv/`, `__pycache__/`, `.env`, `.DS_Store`, -and common compiled/log artifacts such as `*.o`, `*.a`, `*.so`, `*.exe`, `*.dll`, and `*.log`. -If you need those paths exported, use stdin selection or move the files outside those default patterns. +Place a `.gitignore` file in the working directory to exclude files and patterns from the export. +These rules apply in `--no-git` mode and during automatic fallback outside Git repositories. -``` -# Ignore build directories -build/ -dist/ +Supported syntax: -# Ignore specific file types -*.log -*.tmp +- Comments (`#`) +- Negation (`!pattern`) +- Directory trailing slash (`dir/`) +- Root-anchored patterns (`/pattern`) +- Recursive globs (`**/node_modules/`, `**/*.pyc`) -# Ignore node_modules directory -node_modules/ -``` +In filesystem mode, `fuori` also applies a built-in default ignore list when no `.gitignore` is present: + +| Category | Patterns | +|---|---| +| VCS | `.git/` | +| Dependencies | `node_modules/`, `.venv/`, `__pycache__/` | +| Build output | `build/`, `dist/`, `bin/` | +| Compiled artifacts | `*.o`, `*.a`, `*.so`, `*.exe`, `*.dll` | +| Environment / OS | `.env`, `.DS_Store`, `*.log` | + +To export paths that match the default list, use `--from-stdin`. ## File Size Limit @@ -224,7 +227,7 @@ UTF-16 and other non-UTF-8 text encodings are currently treated as non-exportabl The output markdown file will contain: -1. A preamble describing the export mode +1. A preamble with repository, mode, and generation timestamp metadata plus a short mode description 2. A project tree section that reflects the exported artifact (enabled by default) 3. A header with the file path 4. A code block with the file content @@ -235,6 +238,10 @@ Example file contents excerpt (the `Makefile` section is omitted for brevity): ````markdown # Codebase Export +Repository: my-project +Mode: recursive +Generated: 2026-03-16T12:34:56Z + This document contains all the source code files from the current directory subtree. ## Project Tree diff --git a/src/git_paths.c b/src/git_paths.c index cdbd53d..d3e5d07 100644 --- a/src/git_paths.c +++ b/src/git_paths.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -572,3 +573,50 @@ int collect_git_paths(FileSelectionMode mode, free_selected_paths(paths, parsed_count); return status; } + +int resolve_repository_name(FileSelectionMode mode, char* buffer, size_t buffer_size) { + char cwd[MAX_PATH_LENGTH]; + char path_copy[MAX_PATH_LENGTH]; + char* repo_root = NULL; + const char* source = NULL; + + if (!buffer || buffer_size == 0) { + errno = EINVAL; + return -1; + } + + (void)mode; + + if (capture_git_line(".", "--show-toplevel", 1, &repo_root, NULL) == 0) { + source = repo_root; + } else { + if (!getcwd(cwd, sizeof(cwd))) { + free(repo_root); + return -1; + } + source = cwd; + } + + size_t path_len = strlen(source); + if (path_len >= sizeof(path_copy)) { + free(repo_root); + errno = ENAMETOOLONG; + return -1; + } + memcpy(path_copy, source, path_len + 1); + + char* base = basename(path_copy); + if (!base || base[0] == '\0') { + free(repo_root); + errno = EINVAL; + return -1; + } + if (snprintf(buffer, buffer_size, "%s", base) < 0 || strlen(base) >= buffer_size) { + free(repo_root); + errno = ENAMETOOLONG; + return -1; + } + + free(repo_root); + return 0; +} diff --git a/src/git_paths.h b/src/git_paths.h index bdc996a..530eefb 100644 --- a/src/git_paths.h +++ b/src/git_paths.h @@ -24,6 +24,7 @@ int collect_git_paths(FileSelectionMode mode, int collect_stdin_paths(int null_delim, SelectedPath** paths_out, size_t* count_out); +int resolve_repository_name(FileSelectionMode mode, char* buffer, size_t buffer_size); void free_selected_paths(SelectedPath* paths, size_t count); #endif diff --git a/src/main.c b/src/main.c index 5f16d2f..da7e958 100644 --- a/src/main.c +++ b/src/main.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "app.h" @@ -145,6 +146,30 @@ static int make_temp_output_template(const char* output_path, char* tmpl, size_t return 0; } +static int format_generated_timestamp(char* buffer, size_t buffer_size) { + time_t now; + struct tm utc_tm; + + if (!buffer || buffer_size == 0) { + errno = EINVAL; + return -1; + } + + now = time(NULL); + if (now == (time_t)-1) { + return -1; + } + if (!gmtime_r(&now, &utc_tm)) { + return -1; + } + if (strftime(buffer, buffer_size, "%Y-%m-%dT%H:%M:%SZ", &utc_tm) == 0) { + errno = ENAMETOOLONG; + return -1; + } + + return 0; +} + int main(int argc, char* argv[]) { CliOptions options; AppContext ctx = {0}; @@ -158,6 +183,8 @@ int main(int argc, char* argv[]) { int output_needs_close = 0; FILE* output_file = NULL; char temp_output_path[MAX_PATH_LENGTH]; + char repository_name[MAX_PATH_LENGTH]; + char generated_at[32]; temp_output_path[0] = '\0'; if (parse_cli_options(argc, argv, &options) != 0) { return 1; @@ -227,7 +254,24 @@ int main(int argc, char* argv[]) { goto cleanup; } - if (calculate_export_metrics(&plan, &render_info, options.resolved_mode, ctx.show_tree, ctx.tree_depth, &metrics) != 0) { + if (resolve_repository_name(options.resolved_mode, repository_name, sizeof(repository_name)) != 0) { + perror("Error resolving repository name"); + goto cleanup; + } + + if (format_generated_timestamp(generated_at, sizeof(generated_at)) != 0) { + perror("Error formatting export timestamp"); + goto cleanup; + } + + if (calculate_export_metrics(&plan, + &render_info, + options.resolved_mode, + repository_name, + generated_at, + ctx.show_tree, + ctx.tree_depth, + &metrics) != 0) { perror("Error calculating export metrics"); goto cleanup; } @@ -299,7 +343,10 @@ int main(int argc, char* argv[]) { ctx.have_temp = 1; } - if (write_export_header(output_file, options.resolved_mode) != 0) { + if (write_export_header(output_file, + options.resolved_mode, + repository_name, + generated_at) != 0) { perror("Error writing output header"); goto cleanup; } diff --git a/src/render.c b/src/render.c index 4870642..5662325 100644 --- a/src/render.c +++ b/src/render.c @@ -65,6 +65,26 @@ static const char* export_description(FileSelectionMode mode) { } } +static const char* export_mode_label(FileSelectionMode mode) { + switch (mode) { + case FILE_SELECTION_GIT_WORKTREE: + return "worktree"; + case FILE_SELECTION_GIT_STAGED: + return "staged"; + case FILE_SELECTION_GIT_UNSTAGED: + return "unstaged"; + case FILE_SELECTION_GIT_DIFF: + return "diff"; + case FILE_SELECTION_STDIN: + return "stdin"; + case FILE_SELECTION_RECURSIVE: + return "recursive"; + case FILE_SELECTION_AUTO: + default: + return "auto"; + } +} + static size_t estimate_tokens(size_t byte_count) { return (byte_count / 7) * 2 + ((byte_count % 7) * 2) / 7; } @@ -160,11 +180,53 @@ static int write_fence(FILE* out, size_t count, const char* lang) { return (fputc('\n', out) == EOF) ? -1 : 0; } -int write_export_header(FILE* out, FileSelectionMode mode) { +static int count_export_header_bytes(size_t* total, + FileSelectionMode mode, + const char* repository, + const char* generated_at) { + if (!total || !repository || !generated_at) { + errno = EINVAL; + return -1; + } + + if (fuori_count_text_bytes(total, "# Codebase Export\n\n") != 0 || + fuori_count_text_bytes(total, "Repository: ") != 0 || + fuori_count_text_bytes(total, repository) != 0 || + fuori_count_text_bytes(total, "\nMode: ") != 0 || + fuori_count_text_bytes(total, export_mode_label(mode)) != 0 || + fuori_count_text_bytes(total, "\nGenerated: ") != 0 || + fuori_count_text_bytes(total, generated_at) != 0 || + fuori_count_text_bytes(total, "\n\n") != 0 || + fuori_count_text_bytes(total, export_description(mode)) != 0) { + return -1; + } + + return 0; +} + +int write_export_header(FILE* out, + FileSelectionMode mode, + const char* repository, + const char* generated_at) { + if (!out || !repository || !generated_at) { + errno = EINVAL; + return -1; + } + if (fuori_write_text(out, "# Codebase Export\n\n") != 0) { return -1; } + if (fuori_write_text(out, "Repository: ") != 0 || + fuori_write_text(out, repository) != 0 || + fuori_write_text(out, "\nMode: ") != 0 || + fuori_write_text(out, export_mode_label(mode)) != 0 || + fuori_write_text(out, "\nGenerated: ") != 0 || + fuori_write_text(out, generated_at) != 0 || + fuori_write_text(out, "\n\n") != 0) { + return -1; + } + return fuori_write_text(out, export_description(mode)); } @@ -254,6 +316,8 @@ static int count_entry_bytes(const ExportEntry* entry, size_t fence, size_t* tot int calculate_export_metrics(const ExportPlan* plan, const RenderPlanInfo* info, FileSelectionMode mode, + const char* repository, + const char* generated_at, int show_tree, size_t tree_depth, ExportMetrics* metrics) { @@ -264,8 +328,7 @@ int calculate_export_metrics(const ExportPlan* plan, return -1; } - if (fuori_count_text_bytes(&total, "# Codebase Export\n\n") != 0 || - fuori_count_text_bytes(&total, export_description(mode)) != 0) { + if (count_export_header_bytes(&total, mode, repository, generated_at) != 0) { return -1; } diff --git a/src/render.h b/src/render.h index 2d245e9..732a08d 100644 --- a/src/render.h +++ b/src/render.h @@ -22,10 +22,15 @@ void free_render_plan_info(RenderPlanInfo* info); int calculate_export_metrics(const ExportPlan* plan, const RenderPlanInfo* info, FileSelectionMode mode, + const char* repository, + const char* generated_at, int show_tree, size_t tree_depth, ExportMetrics* metrics); -int write_export_header(FILE* out, FileSelectionMode mode); +int write_export_header(FILE* out, + FileSelectionMode mode, + const char* repository, + const char* generated_at); int render_export_plan(FILE* out, const ExportPlan* plan, const RenderPlanInfo* info, int verbose); #endif diff --git a/tests/test_cli.sh b/tests/test_cli.sh index a1c115c..d49fbfe 100644 --- a/tests/test_cli.sh +++ b/tests/test_cli.sh @@ -71,6 +71,9 @@ int main(void) { return 0; } EOF_OUTSIDE (cd "$OUTSIDE" && "$BIN" -o - >stdout.txt 2>stderr.txt) +assert_contains "$OUTSIDE/stdout.txt" "Repository: outside" +assert_contains "$OUTSIDE/stdout.txt" "Mode: recursive" +assert_contains "$OUTSIDE/stdout.txt" "Generated: " assert_contains "$OUTSIDE/stdout.txt" "This document contains all the source code files from the current directory subtree using the local filesystem walker." assert_not_contains "$OUTSIDE/stderr.txt" "Git file-selection modes require" assert_not_contains "$OUTSIDE/stderr.txt" "git rev-parse failed" @@ -356,6 +359,8 @@ ignore me EOF_IGNORED (cd "$REPO/sub" && "$BIN" -o - >stdout.txt 2>stderr.txt) +assert_contains "$REPO/sub/stdout.txt" "Repository: repo" +assert_contains "$REPO/sub/stdout.txt" "Mode: worktree" assert_contains "$REPO/sub/stdout.txt" "This document contains tracked files plus untracked, non-ignored files from the current Git subtree." assert_contains "$REPO/sub/stdout.txt" "├── tracked.c" assert_contains "$REPO/sub/stdout.txt" "└── untracked.py"