diff --git a/.gitignore b/.gitignore index 09f2c49..1ef2f86 100644 --- a/.gitignore +++ b/.gitignore @@ -2,10 +2,12 @@ fuori test_tree test_ignore fuori-test +src/generated_unpacker.h + +# misc +.git/ # macOS .DS_Store -/scripts - _export.md diff --git a/Makefile b/Makefile index 21b5e88..aab4599 100644 --- a/Makefile +++ b/Makefile @@ -5,19 +5,25 @@ CFLAGS = -Wall -Wextra -Wshadow -Wcast-align -Wwrite-strings -Wredundant-decls \ -Wstrict-prototypes -Wold-style-definition -std=c99 -O2 -D_POSIX_C_SOURCE=200809L TARGET = fuori TEST_CLI_TARGET = fuori-test -SOURCES = src/main.c src/collect.c src/render.c src/git_paths.c src/ignore.c src/options.c src/tree.c src/sensitive.c +SOURCES = src/main.c src/collect.c src/render.c src/git_paths.c src/ignore.c src/options.c src/tree.c src/sensitive.c src/unpacker.c TEST_TARGET = test_ignore TREE_TEST_TARGET = test_tree +UNPACKER_SOURCE = scripts/extract_full_export.py.txt +UNPACKER_GENERATOR = scripts/generate_unpacker_header.py +GENERATED_UNPACKER = src/generated_unpacker.h PREFIX ?= /usr/local BINDIR ?= $(PREFIX)/bin VERSION ?= dev all: $(TARGET) -$(TARGET): $(SOURCES) +$(GENERATED_UNPACKER): $(UNPACKER_SOURCE) $(UNPACKER_GENERATOR) + python3 $(UNPACKER_GENERATOR) $(UNPACKER_SOURCE) $(GENERATED_UNPACKER) + +$(TARGET): $(SOURCES) $(GENERATED_UNPACKER) $(CC) $(CPPFLAGS) $(CFLAGS) -o $(TARGET) $(SOURCES) -$(TEST_CLI_TARGET): $(SOURCES) +$(TEST_CLI_TARGET): $(SOURCES) $(GENERATED_UNPACKER) $(CC) $(CPPFLAGS) $(CFLAGS) -DFUORI_TESTING -o $(TEST_CLI_TARGET) $(SOURCES) $(TEST_TARGET): tests/test_ignore.c src/ignore.c src/ignore.h @@ -32,7 +38,7 @@ test: $(TARGET) $(TEST_CLI_TARGET) $(TEST_TARGET) $(TREE_TEST_TARGET) BIN=./$(TEST_CLI_TARGET) sh ./tests/test_cli.sh clean: - rm -f $(TARGET) $(TEST_CLI_TARGET) $(TEST_TARGET) $(TREE_TEST_TARGET) + rm -f $(TARGET) $(TEST_CLI_TARGET) $(TEST_TARGET) $(TREE_TEST_TARGET) $(GENERATED_UNPACKER) install: $(TARGET) install -d $(BINDIR) diff --git a/README.md b/README.md index 32a2839..24ce625 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,7 @@ fuori [OPTIONS] | `-0`, `--null` | Use NUL as the stdin delimiter (requires `--from-stdin`) | | `--line-numbers` | Prefix exported code lines with line numbers | | `--hunks []` | In Git delta modes, export only changed hunks plus context lines | +| `--unpacker` | Append an LLM-oriented unpacker appendix for full exports | | `--tree` / `--no-tree` | Include/omit project tree (default: on) | | `--tree-depth ` | Limit tree render depth | | `-s ` | Max file size in KB (default: 100) | @@ -111,6 +112,7 @@ fuori [OPTIONS] Git selection flags (`--staged`, `--unstaged`, `--diff`) and `--from-stdin` are mutually exclusive; `--no-git` cannot be combined with them. `--no-default-ignore` only applies to filesystem selection. `--hunks` only applies to `--staged`, `--unstaged`, and `--diff`. +`--unpacker` cannot be combined with `--hunks`. **Examples:** @@ -122,6 +124,7 @@ fuori --diff HEAD~3..HEAD # Files changed in the last 3 commits fuori --diff main...HEAD # Changes since branching from main fuori --staged --hunks # Only changed hunks with default context fuori --diff main...HEAD --hunks=8 # Wider hunk context for review +fuori --unpacker # Append an unpacker appendix for LLM reconstruction fuori -o - > codebase.md # Pipe to stdout fuori --no-tree # Skip the project tree section fuori --tree-depth 2 # Shallow tree @@ -268,7 +271,8 @@ The output markdown file will contain: 5. Either a full-file code block or one or more hunk slices separated by omission markers such as `... 84 unchanged lines omitted ...` 6. Optional line-number prefixes inside code blocks when `--line-numbers` is set; hunk exports keep original file line numbers 7. Appropriate language identifiers for syntax highlighting -8. A `stderr` summary of files, bytes, and estimated tokens after successful completion +8. An optional unpacker appendix with reconstruction instructions and an embedded Python helper when `--unpacker` is set +9. A `stderr` summary of files, bytes, and estimated tokens after successful completion Example file contents excerpt (the `Makefile` section is omitted for brevity): ````markdown diff --git a/scripts/extract_full_export.py.txt b/scripts/extract_full_export.py.txt new file mode 100644 index 0000000..2c4da51 --- /dev/null +++ b/scripts/extract_full_export.py.txt @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +import argparse +import html +import re +import sys +from pathlib import Path + + +LINE_NUMBER_RE = re.compile(r"^\s*\d+ \| ?(.*)$") +FILES_BEGIN_MARKER = "" +FILES_END_MARKER = "" + + +class ExportParseError(Exception): + pass + + +def decode_heading_path(text: str) -> str: + text = html.unescape(text) + result: list[str] = [] + i = 0 + + while i < len(text): + if text[i] != "\\": + result.append(text[i]) + i += 1 + continue + + if i + 1 >= len(text): + result.append("\\") + break + + next_char = text[i + 1] + if next_char == "n": + result.append("\n") + i += 2 + elif next_char == "r": + result.append("\r") + i += 2 + elif next_char == "t": + result.append("\t") + i += 2 + elif next_char in ("\\", "`", "*", "[", "]"): + result.append(next_char) + i += 2 + elif next_char == "x" and i + 3 < len(text): + hex_value = text[i + 2 : i + 4] + try: + result.append(chr(int(hex_value, 16))) + i += 4 + except ValueError: + result.append("\\") + result.append(next_char) + i += 2 + else: + result.append("\\") + result.append(next_char) + i += 2 + + return "".join(result) + + +def parse_open_fence(line: str) -> int | None: + if not line.startswith("```"): + return None + + count = 0 + while count < len(line) and line[count] == "`": + count += 1 + + return count if count >= 3 else None + + +def parse_preamble_flags(lines: list[str]) -> tuple[bool, bool]: + line_numbers_on = False + hunks_on = False + + for line in lines: + heading = line.rstrip("\n") + if heading.startswith("## "): + break + if heading == "Line numbers: on": + line_numbers_on = True + elif heading.startswith("Hunks: on"): + hunks_on = True + + return line_numbers_on, hunks_on + + +def strip_line_number(line: str) -> str: + newline = "\n" if line.endswith("\n") else "" + content = line[:-1] if newline else line + match = LINE_NUMBER_RE.fullmatch(content) + if not match: + raise ExportParseError(f"invalid numbered line: {content!r}") + return match.group(1) + newline + + +def next_nonblank_index(lines: list[str], start: int) -> int | None: + i = start + while i < len(lines) and lines[i].strip() == "": + i += 1 + return i if i < len(lines) else None + + +def read_export_entries(export_path: Path) -> tuple[list[tuple[str, str]], bool]: + text = export_path.read_text(encoding="utf-8") + lines = text.splitlines(keepends=True) + line_numbers_on, hunks_on = parse_preamble_flags(lines) + has_files_marker = any(line.rstrip("\n") == FILES_BEGIN_MARKER for line in lines) + + if hunks_on: + raise ExportParseError("hunk exports are not supported; use a full export without --hunks") + + entries: list[tuple[str, str]] = [] + i = 0 + seen_file = False + saw_files_marker = False + + while i < len(lines): + heading = lines[i].rstrip("\n") + + if has_files_marker and not saw_files_marker: + if heading == FILES_BEGIN_MARKER: + saw_files_marker = True + i += 1 + continue + + if heading == FILES_BEGIN_MARKER: + saw_files_marker = True + i += 1 + continue + if heading == FILES_END_MARKER: + break + + if not seen_file and heading == "## Change Context": + i += 1 + while i < len(lines) and not lines[i].startswith("## "): + i += 1 + continue + + if not seen_file and heading == "## Project Tree": + i += 1 + while i < len(lines) and lines[i].strip() == "": + i += 1 + if i >= len(lines): + raise ExportParseError("unterminated project tree section") + + fence_len = parse_open_fence(lines[i].rstrip("\n")) + if fence_len is None: + raise ExportParseError("project tree section is missing its opening fence") + i += 1 + + while i < len(lines) and lines[i].rstrip("\n") != ("`" * fence_len): + i += 1 + if i >= len(lines): + raise ExportParseError("unterminated project tree code fence") + i += 1 + continue + + if not heading.startswith("## "): + i += 1 + continue + + body_start = next_nonblank_index(lines, i + 1) + if body_start is None: + raise ExportParseError(f"missing code fence for section {heading[3:]!r}") + + fence_len = parse_open_fence(lines[body_start].rstrip("\n")) + if fence_len is None: + if not seen_file and not saw_files_marker: + raise ExportParseError(f"unexpected non-file section before file entries: {heading!r}") + raise ExportParseError(f"invalid code fence for {heading[3:]!r}") + + file_path = decode_heading_path(heading[3:]) + i = body_start + 1 + + body_lines: list[str] = [] + while i < len(lines): + if lines[i].rstrip("\n") == ("`" * fence_len): + i += 1 + break + body_lines.append(lines[i]) + i += 1 + else: + raise ExportParseError(f"unterminated code fence for {file_path!r}") + + if line_numbers_on: + body_lines = [strip_line_number(line) for line in body_lines] + + entries.append((file_path, "".join(body_lines))) + seen_file = True + + return entries, line_numbers_on + + +def safe_output_path(output_dir: Path, export_path: str) -> Path: + relative = Path(export_path.lstrip("/")) if export_path.startswith("/") else Path(export_path) + destination = (output_dir / relative).resolve() + output_root = output_dir.resolve() + + try: + destination.relative_to(output_root) + except ValueError as exc: + raise ExportParseError(f"refusing to write outside output directory: {export_path!r}") from exc + + return destination + + +def write_entries(output_dir: Path, entries: list[tuple[str, str]]) -> None: + for export_path, content in entries: + destination = safe_output_path(output_dir, export_path) + destination.parent.mkdir(parents=True, exist_ok=True) + destination.write_text(content, encoding="utf-8") + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Extract source files from a full fuori markdown export." + ) + parser.add_argument("export", type=Path, help="Path to the markdown export") + parser.add_argument("output_dir", type=Path, help="Directory to reconstruct files into") + args = parser.parse_args() + + try: + entries, _ = read_export_entries(args.export) + args.output_dir.mkdir(parents=True, exist_ok=True) + write_entries(args.output_dir, entries) + except (OSError, ExportParseError) as exc: + print(f"error: {exc}", file=sys.stderr) + return 1 + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/generate_unpacker_header.py b/scripts/generate_unpacker_header.py new file mode 100644 index 0000000..46143c1 --- /dev/null +++ b/scripts/generate_unpacker_header.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +import sys +from pathlib import Path + + +def main() -> int: + if len(sys.argv) != 3: + print("usage: generate_unpacker_header.py ", file=sys.stderr) + return 1 + + source = Path(sys.argv[1]) + output = Path(sys.argv[2]) + data = source.read_bytes() + + lines = [ + "#ifndef GENERATED_UNPACKER_H", + "#define GENERATED_UNPACKER_H", + "", + "#include ", + "", + "static const unsigned char FUORI_UNPACKER_SCRIPT[] = {", + ] + + for offset in range(0, len(data), 12): + row = ", ".join(f"0x{byte:02X}" for byte in data[offset : offset + 12]) + lines.append(f" {row},") + + lines.extend( + [ + " 0x00", + "};", + "", + f"static const size_t FUORI_UNPACKER_SCRIPT_LEN = {len(data)};", + "", + "#endif", + ] + ) + + output.write_text("\n".join(lines) + "\n", encoding="utf-8") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/app.h b/src/app.h index 119af12..fc3e343 100644 --- a/src/app.h +++ b/src/app.h @@ -37,6 +37,7 @@ typedef struct { size_t skipped_ignored; size_t skipped_symlink; size_t skipped_sensitive; + size_t skipped_unreadable_dirs; } AppContext; typedef enum { diff --git a/src/collect.c b/src/collect.c index 944640d..b61cd8d 100644 --- a/src/collect.c +++ b/src/collect.c @@ -165,7 +165,10 @@ static int is_binary_file(const unsigned char* buffer, size_t bytes_read) { ctrl++; } } - return (ctrl * 100 / bytes_read) > 2; + size_t whole_hundreds = bytes_read / 100; + size_t remainder = bytes_read % 100; + size_t threshold = whole_hundreds * 3 + ((remainder * 3 + 99) / 100); + return ctrl >= threshold; } static const char* classify_shebang_interpreter(const char* name) { @@ -242,8 +245,8 @@ static const char* detect_shebang(const unsigned char* buffer, size_t buffer_len interpreter = first_base; } - char* interp_base = strrchr(interpreter, '/'); - interp_base = interp_base ? interp_base + 1 : (char*)interpreter; + const char* interp_base = strrchr(interpreter, '/'); + interp_base = interp_base ? interp_base + 1 : interpreter; return classify_shebang_interpreter(interp_base); } @@ -357,9 +360,10 @@ static int read_file_buffer(const char* filepath, if (buffer_size > 0) { bytes_read = fread(buffer, 1, buffer_size, file); if (bytes_read < buffer_size) { + int read_failed = ferror(file); free(buffer); fclose(file); - if (ferror(file)) { + if (read_failed) { perror("Error reading file"); return READ_FILE_ERROR; } @@ -594,6 +598,7 @@ static int collect_recursive_paths(const char* base_path, if (!dir) { if (strcmp(base_path, ".") != 0 && (errno == EACCES || errno == EPERM)) { + ctx->skipped_unreadable_dirs++; fprintf(stderr, "Warning: Failed to process directory %s\n", base_path); return 0; } diff --git a/src/git_paths.c b/src/git_paths.c index a05241a..b875fc2 100644 --- a/src/git_paths.c +++ b/src/git_paths.c @@ -15,6 +15,10 @@ typedef enum { GIT_PROBE_FALLBACK } GitProbeResult; +#ifndef GIT_CAPTURE_MAX_BYTES +#define GIT_CAPTURE_MAX_BYTES (64U * 1024U * 1024U) +#endif + #define GIT_SELECTION_ARGS_MAX 13 #define GIT_HUNK_ARGS_MAX 13 @@ -53,6 +57,23 @@ static int compare_selected_paths(const void* lhs, const void* rhs) { right->previous_repo_rel_path ? right->previous_repo_rel_path : ""); } +static int set_fd_cloexec(int fd) { + int flags; + + if (fd < 0) { + errno = EINVAL; + return -1; + } + flags = fcntl(fd, F_GETFD); + if (flags == -1) { + return -1; + } + if (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) == -1) { + return -1; + } + return 0; +} + void free_selected_paths(SelectedPath* paths, size_t count) { if (!paths) return; for (size_t i = 0; i < count; i++) { @@ -143,7 +164,10 @@ static int run_command_capture(const char* const argv[], close(stdout_pipe[1]); return -1; } - if (fcntl(error_pipe[1], F_SETFD, FD_CLOEXEC) == -1) { + if (set_fd_cloexec(stdout_pipe[0]) == -1 || + set_fd_cloexec(stdout_pipe[1]) == -1 || + set_fd_cloexec(error_pipe[0]) == -1 || + set_fd_cloexec(error_pipe[1]) == -1) { close(stdout_pipe[0]); close(stdout_pipe[1]); close(error_pipe[0]); @@ -163,6 +187,7 @@ static int run_command_capture(const char* const argv[], if (pid == 0) { int child_errno; int null_fd = -1; + size_t argc = 0; close(stdout_pipe[0]); close(error_pipe[0]); @@ -186,7 +211,12 @@ static int run_command_capture(const char* const argv[], } close(stdout_pipe[1]); - execvp(argv[0], (char* const*)argv); + while (argv[argc] != NULL) { + argc++; + } + char* argv_mut[argc + 1]; + memcpy(argv_mut, argv, (argc + 1) * sizeof(*argv_mut)); + execvp(argv_mut[0], argv_mut); child_errno = errno; write(error_pipe[1], &child_errno, sizeof(child_errno)); @@ -209,6 +239,11 @@ static int run_command_capture(const char* const argv[], } goto cleanup; } + if ((size_t)read_len > GIT_CAPTURE_MAX_BYTES || + used > GIT_CAPTURE_MAX_BYTES - (size_t)read_len) { + errno = EFBIG; + goto cleanup; + } if (used + (size_t)read_len < used) { errno = EOVERFLOW; goto cleanup; diff --git a/src/main.c b/src/main.c index 179712b..f8b7d9e 100644 --- a/src/main.c +++ b/src/main.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -87,6 +88,7 @@ static void print_verbose_skip_summary(const AppContext* ctx) { char ignored_buf[32]; char symlink_buf[32]; char sensitive_buf[32]; + char unreadable_dirs_buf[32]; if (!ctx || !ctx->verbose) { return; @@ -96,24 +98,44 @@ static void print_verbose_skip_summary(const AppContext* ctx) { format_size_with_commas(ctx->skipped_too_large, large_buf, sizeof(large_buf)) != 0 || format_size_with_commas(ctx->skipped_ignored, ignored_buf, sizeof(ignored_buf)) != 0 || format_size_with_commas(ctx->skipped_symlink, symlink_buf, sizeof(symlink_buf)) != 0 || - format_size_with_commas(ctx->skipped_sensitive, sensitive_buf, sizeof(sensitive_buf)) != 0) { + format_size_with_commas(ctx->skipped_sensitive, sensitive_buf, sizeof(sensitive_buf)) != 0 || + format_size_with_commas(ctx->skipped_unreadable_dirs, unreadable_dirs_buf, sizeof(unreadable_dirs_buf)) != 0) { fprintf(stderr, - "Skipped: binary/empty=%zu, too_large=%zu, ignored=%zu, symlink=%zu, sensitive=%zu\n", + "Skipped: binary/empty=%zu, too_large=%zu, ignored=%zu, symlink=%zu, sensitive=%zu, unreadable_dirs=%zu\n", ctx->skipped_binary, ctx->skipped_too_large, ctx->skipped_ignored, ctx->skipped_symlink, - ctx->skipped_sensitive); + ctx->skipped_sensitive, + ctx->skipped_unreadable_dirs); return; } fprintf(stderr, - "Skipped: binary/empty=%s, too_large=%s, ignored=%s, symlink=%s, sensitive=%s\n", + "Skipped: binary/empty=%s, too_large=%s, ignored=%s, symlink=%s, sensitive=%s, unreadable_dirs=%s\n", binary_buf, large_buf, ignored_buf, symlink_buf, - sensitive_buf); + sensitive_buf, + unreadable_dirs_buf); +} + +static void print_unreadable_directory_warning(const AppContext* ctx) { + char count_buf[32]; + + if (!ctx || ctx->skipped_unreadable_dirs == 0) { + return; + } + if (format_size_with_commas(ctx->skipped_unreadable_dirs, count_buf, sizeof(count_buf)) != 0) { + fprintf(stderr, + "Warning: skipped %zu unreadable directorie(s); export may be incomplete.\n", + ctx->skipped_unreadable_dirs); + return; + } + fprintf(stderr, + "Warning: skipped %s unreadable directorie(s); export may be incomplete.\n", + count_buf); } static int make_temp_output_template(const char* output_path, char* tmpl, size_t tmpl_size) { @@ -174,6 +196,75 @@ static int format_generated_timestamp(char* buffer, size_t buffer_size) { return 0; } +static int fsync_stream_file(FILE* file) { + int fd; + + if (!file) { + errno = EINVAL; + return -1; + } + fd = fileno(file); + if (fd == -1) { + return -1; + } + return fsync(fd); +} + +static int fsync_parent_directory(const char* path) { + char path_copy[MAX_PATH_LENGTH]; + int dir_fd = -1; + int open_flags = O_RDONLY; + char* dir; + int saved_errno = 0; + + if (!path || path[0] == '\0') { + errno = EINVAL; + return -1; + } + if (strlen(path) >= sizeof(path_copy)) { + errno = ENAMETOOLONG; + return -1; + } + memcpy(path_copy, path, strlen(path) + 1); + dir = dirname(path_copy); + if (!dir || dir[0] == '\0') { + errno = EINVAL; + return -1; + } + +#ifdef O_CLOEXEC + open_flags |= O_CLOEXEC; +#endif +#ifdef O_DIRECTORY + open_flags |= O_DIRECTORY; +#endif + + dir_fd = open(dir, open_flags); +#ifdef O_DIRECTORY + if (dir_fd == -1 && errno == EINVAL) { + dir_fd = open(dir, O_RDONLY); + } +#endif + if (dir_fd == -1) { + return -1; + } + + if (fsync(dir_fd) == -1) { + saved_errno = errno; + close(dir_fd); + if (saved_errno == EINVAL || saved_errno == ENOTSUP || saved_errno == EBADF) { + return 0; + } + errno = saved_errno; + return -1; + } + if (close(dir_fd) == -1) { + return -1; + } + + return 0; +} + static const char* normalize_display_path(const char* path) { if (path && strncmp(path, "./", 2) == 0) { return path + 2; @@ -345,6 +436,7 @@ int main(int argc, char* argv[]) { render_ctx.diff_range = options.diff_range; render_ctx.show_line_numbers = options.show_line_numbers; render_ctx.show_hunks = options.show_hunks; + render_ctx.show_unpacker = options.show_unpacker; render_ctx.show_tree = ctx.show_tree; render_ctx.hunk_context_lines = options.hunk_context_lines; render_ctx.tree_depth = ctx.tree_depth; @@ -461,6 +553,10 @@ int main(int argc, char* argv[]) { goto cleanup; } if (output_needs_close) { + if (fsync_stream_file(output_file) != 0) { + perror("Error syncing temporary output file"); + goto cleanup; + } if (fclose(output_file) != 0) { output_file = NULL; perror("Error closing output file"); @@ -475,6 +571,10 @@ int main(int argc, char* argv[]) { perror("Error moving temporary file to final destination"); goto cleanup; } + if (fsync_parent_directory(ctx.output_path) != 0) { + perror("Error syncing output directory"); + goto cleanup; + } temp_created = 0; if (ctx.verbose) { fprintf(stderr, "Codebase exported to %s successfully!\n", ctx.output_path); @@ -484,6 +584,7 @@ int main(int argc, char* argv[]) { } print_export_summary(&metrics); + print_unreadable_directory_warning(&ctx); print_verbose_skip_summary(&ctx); status = 0; diff --git a/src/options.c b/src/options.c index c42b509..dac6351 100644 --- a/src/options.c +++ b/src/options.c @@ -53,6 +53,11 @@ static void print_hunks_conflict(void) { fprintf(stderr, "Use -h or --help for usage information\n"); } +static void print_unpacker_hunks_conflict(void) { + fprintf(stderr, "--unpacker cannot be used with --hunks because unpacking requires complete file contents\n"); + fprintf(stderr, "Use -h or --help for usage information\n"); +} + void init_cli_options(CliOptions* options) { if (!options) { return; @@ -86,6 +91,7 @@ void print_usage(const char* argv0) { printf(" -0, --null Use NUL as the input record delimiter instead of newline (requires --from-stdin)\n"); printf(" --line-numbers Prefix exported code lines with line numbers\n"); printf(" --hunks[=N] Export only changed hunks with N context lines (default: 3)\n"); + printf(" --unpacker Append an LLM-oriented unpacker appendix for full exports\n"); printf(" --tree Include a directory tree section (default)\n"); printf(" --no-tree Omit the directory tree section\n"); printf(" --tree-depth Limit tree rendering depth to N levels\n"); @@ -99,6 +105,12 @@ void print_usage(const char* argv0) { printf(" --allow-sensitive Export files even if they match sensitive-file protection rules\n"); } +/* + * CLI invariants: + * 1. Parse flags and validate conflicts against the requested mode. + * 2. Resolve auto mode into a concrete selection mode and collect paths when needed. + * 3. Validate any mode-specific constraints that only make sense after resolution. + */ int parse_cli_options(int argc, char* argv[], CliOptions* options) { int force_no_git = 0; @@ -169,6 +181,8 @@ int parse_cli_options(int argc, char* argv[], CliOptions* options) { options->show_tree = 0; } else if (strcmp(argv[i], "--line-numbers") == 0) { options->show_line_numbers = 1; + } else if (strcmp(argv[i], "--unpacker") == 0) { + options->show_unpacker = 1; } else if (strcmp(argv[i], "--hunks") == 0) { options->show_hunks = 1; options->hunk_context_lines = 3; @@ -310,6 +324,10 @@ int parse_cli_options(int argc, char* argv[], CliOptions* options) { print_hunks_conflict(); return -1; } + if (options->show_hunks && options->show_unpacker) { + print_unpacker_hunks_conflict(); + return -1; + } if (force_no_git) { options->requested_mode = FILE_SELECTION_RECURSIVE; diff --git a/src/options.h b/src/options.h index 614b204..c85b038 100644 --- a/src/options.h +++ b/src/options.h @@ -14,6 +14,7 @@ typedef struct { int show_tree; int show_line_numbers; int show_hunks; + int show_unpacker; int no_default_ignore; int allow_sensitive; size_t max_file_size; diff --git a/src/render.c b/src/render.c index 88b16dd..9c756a9 100644 --- a/src/render.c +++ b/src/render.c @@ -7,6 +7,7 @@ #include "text_io.h" #include "tree.h" +#include "unpacker.h" #ifdef FUORI_TESTING static int maybe_inject_render_failure(size_t index) { @@ -59,6 +60,16 @@ typedef struct { static int count_fence_bytes(size_t* total, size_t count, const char* lang); static int write_fence(FILE* out, size_t count, const char* lang); +static const char* FILES_BEGIN_MARKER = "\n\n"; +static const char* FILES_END_MARKER = "\n\n"; +static const char* UNPACKER_BEGIN_MARKER = "\n\n"; +static const char* UNPACKER_END_MARKER = "\n"; +static const char* UNPACKER_EXPLANATION = + "This export contains complete file bodies and an unpacker helper for frontier LLM workflows. " + "To reconstruct the tree locally, write the Python script below to a file such as " + "`extract_full_export.py` and run " + "`python3 extract_full_export.py `.\n\n"; + static const char* export_description(FileSelectionMode mode) { switch (mode) { case FILE_SELECTION_GIT_WORKTREE: @@ -331,6 +342,10 @@ static int emit_export_header(RenderSink* sink, const ExportRenderContext* ctx) return -1; } } + if (ctx->show_unpacker && + sink_write_text(sink, "\nUnpacker: included") != 0) { + return -1; + } if (sink_write_text(sink, "\n\n") != 0 || sink_write_text(sink, export_description(ctx->mode)) != 0) { @@ -395,6 +410,65 @@ static int emit_change_context(RenderSink* sink, const ExportRenderContext* ctx) return sink_write_text(sink, "\n"); } +static int emit_file_entries_marker(RenderSink* sink, size_t visible_count) { + if (!sink) { + errno = EINVAL; + return -1; + } + if (visible_count == 0) { + return 0; + } + return sink_write_text(sink, FILES_BEGIN_MARKER); +} + +static int emit_file_entries_end_marker(RenderSink* sink, size_t visible_count) { + if (!sink) { + errno = EINVAL; + return -1; + } + if (visible_count == 0) { + return 0; + } + return sink_write_text(sink, FILES_END_MARKER); +} + +static int emit_unpacker_appendix(RenderSink* sink, const ExportRenderContext* ctx) { + const char* script; + + if (!sink || !ctx) { + errno = EINVAL; + return -1; + } + if (!ctx->show_unpacker) { + return 0; + } + + script = fuori_unpacker_script(); + if (!script) { + errno = EINVAL; + return -1; + } + + if (sink_write_text(sink, UNPACKER_BEGIN_MARKER) != 0 || + sink_write_text(sink, UNPACKER_EXPLANATION) != 0 || + sink_write_fence(sink, 3, "python") != 0 || + sink_write_text(sink, script) != 0) { + return -1; + } + if (fuori_unpacker_script_length() == 0 || + script[fuori_unpacker_script_length() - 1] != '\n') { + if (sink_write_char(sink, '\n') != 0) { + return -1; + } + } + if (sink_write_fence(sink, 3, NULL) != 0 || + sink_write_text(sink, "\n") != 0 || + sink_write_text(sink, UNPACKER_END_MARKER) != 0) { + return -1; + } + return 0; +} + int write_export_header(FILE* out, const ExportRenderContext* ctx) { RenderSink sink = {.out = out, .total = NULL}; return emit_export_header(&sink, ctx); @@ -965,6 +1039,9 @@ int calculate_export_metrics(const ExportPlan* plan, count_project_tree_bytes_filtered(plan, info->include_mask, ctx->tree_depth, &total) != 0) { return -1; } + if (emit_file_entries_marker(&sink, info->visible_count) != 0) { + return -1; + } for (size_t i = 0; i < plan->count; i++) { if (!info->include_mask[i]) { @@ -974,6 +1051,10 @@ int calculate_export_metrics(const ExportPlan* plan, return -1; } } + if (emit_file_entries_end_marker(&sink, info->visible_count) != 0 || + emit_unpacker_appendix(&sink, ctx) != 0) { + return -1; + } metrics->files_exported = info->visible_count; metrics->bytes_written = total; @@ -993,6 +1074,10 @@ int render_export_plan(FILE* out, return -1; } + if (emit_file_entries_marker(&sink, info->visible_count) != 0) { + return -1; + } + for (size_t i = 0; i < plan->count; i++) { if (!info->include_mask[i]) { continue; @@ -1009,5 +1094,9 @@ int render_export_plan(FILE* out, return -1; } } + if (emit_file_entries_end_marker(&sink, info->visible_count) != 0 || + emit_unpacker_appendix(&sink, ctx) != 0) { + return -1; + } return 0; } diff --git a/src/render.h b/src/render.h index 6580afc..131458d 100644 --- a/src/render.h +++ b/src/render.h @@ -48,6 +48,7 @@ typedef struct { const char* diff_range; int show_line_numbers; int show_hunks; + int show_unpacker; int show_tree; size_t hunk_context_lines; size_t tree_depth; diff --git a/src/unpacker.c b/src/unpacker.c new file mode 100644 index 0000000..93c154c --- /dev/null +++ b/src/unpacker.c @@ -0,0 +1,11 @@ +#include "unpacker.h" + +#include "generated_unpacker.h" + +const char* fuori_unpacker_script(void) { + return (const char*)FUORI_UNPACKER_SCRIPT; +} + +size_t fuori_unpacker_script_length(void) { + return FUORI_UNPACKER_SCRIPT_LEN; +} diff --git a/src/unpacker.h b/src/unpacker.h new file mode 100644 index 0000000..c885a5f --- /dev/null +++ b/src/unpacker.h @@ -0,0 +1,9 @@ +#ifndef UNPACKER_H +#define UNPACKER_H + +#include + +const char* fuori_unpacker_script(void); +size_t fuori_unpacker_script_length(void); + +#endif diff --git a/tests/test_cli.sh b/tests/test_cli.sh index 0634429..29fca7a 100644 --- a/tests/test_cli.sh +++ b/tests/test_cli.sh @@ -69,6 +69,7 @@ assert_contains "$TMPDIR/help_stdout.txt" "--allow-sensitive" assert_contains "$TMPDIR/help_stdout.txt" "--hunks" assert_contains "$TMPDIR/help_stdout.txt" "--line-numbers" assert_contains "$TMPDIR/help_stdout.txt" "--no-default-ignore" +assert_contains "$TMPDIR/help_stdout.txt" "--unpacker" assert_file_equals "$TMPDIR/help_stderr.txt" "" OUTSIDE="$TMPDIR/outside" @@ -102,6 +103,17 @@ assert_contains "$OUTSIDE/redirected.md" "## main.c" assert_contains "$OUTSIDE/redirected.md" "## notes.md" assert_not_contains "$OUTSIDE/redirected.md" "redirected.md" +(cd "$OUTSIDE" && "$BIN" --unpacker -o - >unpacker_stdout.txt 2>unpacker_stderr.txt) +assert_contains "$OUTSIDE/unpacker_stdout.txt" "Unpacker: included" +assert_contains "$OUTSIDE/unpacker_stdout.txt" "" +assert_contains "$OUTSIDE/unpacker_stdout.txt" "" +assert_contains "$OUTSIDE/unpacker_stdout.txt" "" +assert_contains "$OUTSIDE/unpacker_stdout.txt" "This export contains complete file bodies and an unpacker helper" +assert_contains "$OUTSIDE/unpacker_stdout.txt" '```python' +assert_contains "$OUTSIDE/unpacker_stdout.txt" '#!/usr/bin/env python3' +assert_contains "$OUTSIDE/unpacker_stdout.txt" "extract_full_export.py " +assert_contains "$OUTSIDE/unpacker_stdout.txt" "" + LINE_NUMBERS_DIR="$TMPDIR/line_numbers" mkdir -p "$LINE_NUMBERS_DIR" cat >"$LINE_NUMBERS_DIR/main.c" <<'EOF_LINE_NUMBERS' @@ -342,6 +354,7 @@ if [ "$(id -u)" -ne 0 ]; then assert_contains "$UNREADABLE_DIR/unreadable_dir_stdout.txt" "## main.c" assert_not_contains "$UNREADABLE_DIR/unreadable_dir_stdout.txt" "hidden.c" assert_contains "$UNREADABLE_DIR/unreadable_dir_stderr.txt" "Warning: Failed to process directory ./blocked" + assert_contains "$UNREADABLE_DIR/unreadable_dir_stderr.txt" "Warning: skipped 1 unreadable directorie(s); export may be incomplete." fi ODD_DIR="$TMPDIR/odd_paths" @@ -581,6 +594,11 @@ if (cd "$REPO" && "$BIN" --hunks >/dev/null 2>stderr_hunks_default_invalid.txt); fi assert_contains "$REPO/stderr_hunks_default_invalid.txt" "--hunks can only be used with --staged, --unstaged, or --diff" +if (cd "$REPO" && "$BIN" --staged --hunks --unpacker >/dev/null 2>stderr_unpacker_hunks_invalid.txt); then + fail "expected --staged --hunks --unpacker to fail" +fi +assert_contains "$REPO/stderr_unpacker_hunks_invalid.txt" "--unpacker cannot be used with --hunks because unpacking requires complete file contents" + if (cd "$REPO" && "$BIN" --no-git --hunks >/dev/null 2>stderr_hunks_no_git_invalid.txt); then fail "expected --no-git --hunks to fail" fi