Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 44 additions & 25 deletions cmd/entire/cli/checkpoint/fetching_tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,56 +51,57 @@ func NewFetchingTree(ctx context.Context, tree *object.Tree, s storer.EncodedObj
}
}

// File returns the file at the given path. If the blob is not available
// locally (e.g. after a treeless fetch), it is fetched on demand. If go-git's
// storer still can't see the blob after fetching (due to cached packfile index),
// the blob is read via "git cat-file" and an in-memory File is returned.
// File returns the file at the given path. Resolution order:
// 1. go-git's storer (fast path, in-memory).
// 2. `git cat-file -p` against the on-disk object store (handles
// partial-clone-filtered blobs that go-git can't see, plus packfiles
// created by external git commands after this process opened the repo).
// 3. Remote fetch via the configured fetcher, then cat-file again.
//
// Trying cat-file BEFORE the remote fetch is critical: in partial-clone
// repos, blobs are commonly on disk but invisible to go-git's storer
// (filtered out, or in a packfile not in go-git's index cache). Without
// this short-circuit, every File() would burn a multi-second network
// round-trip even though the blob is already local.
func (t *FetchingTree) File(path string) (*object.File, error) {
// Fast path: blob already available in go-git's storer.
file, err := t.inner.File(path)
if err == nil {
if file, err := t.inner.File(path); err == nil {
return file, nil
}

if t.fetch == nil {
return nil, err //nolint:wrapcheck // pass-through wrapper
}

// Find the tree entry to get the blob hash without resolving the blob.
// FindEntry only navigates tree objects (available after --filter=blob:none).
entry, findErr := t.inner.FindEntry(path)
if findErr != nil {
logging.Debug(t.ctx, "FetchingTree.File: entry not found",
slog.String("path", path),
slog.String("error", findErr.Error()),
)
return nil, err //nolint:wrapcheck // return original File() error
return nil, findErr //nolint:wrapcheck // return original error
}

logging.Debug(t.ctx, "FetchingTree.File: blob missing, fetching",
if file, gitErr := t.readFileViaGit(path, entry); gitErr == nil {
return file, nil
}

if t.fetch == nil {
return nil, fmt.Errorf("blob %s not available locally and no fetcher configured", entry.Hash.String()[:12])
}

logging.Debug(t.ctx, "FetchingTree.File: blob missing locally, fetching from remote",
slog.String("path", path),
slog.String("hash", entry.Hash.String()[:12]),
)

// Fetch the blob from the remote.
if fetchErr := t.fetch(t.ctx, []plumbing.Hash{entry.Hash}); fetchErr != nil {
logging.Warn(t.ctx, "FetchingTree.File: blob fetch failed",
slog.String("path", path),
slog.String("hash", entry.Hash.String()[:12]),
slog.String("error", fetchErr.Error()),
)
return nil, err //nolint:wrapcheck // return original File() error
return nil, fetchErr
}

// Try go-git again — works if blob was stored as a loose object.
file, err = t.inner.File(path)
if err == nil {
if file, err := t.inner.File(path); err == nil {
return file, nil
}

// go-git's storer caches the packfile index and won't see new packs
// created by external git commands. Fall back to "git cat-file" which
// reads directly from the on-disk object store.
logging.Debug(t.ctx, "FetchingTree.File: storer cache stale, reading via git cat-file",
slog.String("path", path),
slog.String("hash", entry.Hash.String()[:12]),
Expand Down Expand Up @@ -134,13 +135,21 @@ func (t *FetchingTree) PreFetch() (int, error) {
return len(missing), nil
}

// CollectMissingBlobs returns the hashes of every blob entry in this tree
// (recursively) that isn't present in the local object store. Useful for
// callers that want to decide whether network work is needed before
// running PreFetch (e.g., to avoid showing a spinner in fast no-op cases).
func (t *FetchingTree) CollectMissingBlobs() []plumbing.Hash {
return t.collectMissingBlobs(t.inner)
}

// collectMissingBlobs recursively walks a tree and returns hashes of blob
// entries that are not present in the local object store.
func (t *FetchingTree) collectMissingBlobs(tree *object.Tree) []plumbing.Hash {
var missing []plumbing.Hash
for _, entry := range tree.Entries {
if entry.Mode.IsFile() {
if t.storer.HasEncodedObject(entry.Hash) != nil {
if t.storer.HasEncodedObject(entry.Hash) != nil && !t.blobOnDisk(entry.Hash) {
missing = append(missing, entry.Hash)
}
} else {
Expand All @@ -154,6 +163,16 @@ func (t *FetchingTree) collectMissingBlobs(tree *object.Tree) []plumbing.Hash {
return missing
}

// blobOnDisk returns true if `git cat-file -e <hash>` finds the blob in
// the local object store. Used as a second-opinion check before deciding
// a blob needs to be fetched: in partial-clone repos a blob can be on
// disk but invisible to go-git's storer (filtered out, or in a packfile
// not in the cached index). We'd rather skip a wasted network round-trip.
func (t *FetchingTree) blobOnDisk(hash plumbing.Hash) bool {
cmd := exec.CommandContext(t.ctx, "git", "cat-file", "-e", hash.String())
return cmd.Run() == nil
}

// readFileViaGit reads a blob via "git cat-file -p <hash>" and returns an
// in-memory *object.File. This bypasses go-git's storer which may have a
// stale packfile index after external git commands fetched new objects.
Expand Down
20 changes: 14 additions & 6 deletions cmd/entire/cli/checkpoint/remote/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,21 +70,29 @@ func Fetch(ctx context.Context, opts FetchOptions) ([]byte, error) {
}

// FetchBlobs fetches specific objects (typically blobs) by hash from a remote.
// Unlike Fetch, this never applies --filter=blob:none (which would be
// contradictory — the point is to download specific blobs) and always uses
// --no-write-fetch-head to avoid polluting FETCH_HEAD.
// Uses `git fetch-pack` rather than `git fetch` because the high-level
// porcelain enforces partial-clone integrity checks that reject blob-only
// responses with "did not send all necessary objects". Plumbing skips those
// checks — it just downloads the requested objects into .git/objects/pack
// and exits — which is exactly what we want when grabbing individual blobs
// by SHA. Works against GitHub for any reachable object, including blobs.
//
// The remote should be a URL (not a remote name) to avoid persisting promisor
// settings onto the named remote. Use FetchURL to obtain the URL.
func FetchBlobs(ctx context.Context, remote string, hashes []string) error {
args := []string{"fetch", "--no-tags", "--no-write-fetch-head", remote}
args := []string{"fetch-pack", remote}
args = append(args, hashes...)

cmd := newCommand(ctx, args...)
disableTerminalPrompt(cmd)
_, err := cmd.CombinedOutput()
output, err := cmd.CombinedOutput()
if err != nil {
return fmt.Errorf("git fetch blobs: %w", err)
redactedURL := RedactURL(remote)
msg := strings.TrimSpace(strings.ReplaceAll(string(output), remote, redactedURL))
if msg != "" {
return fmt.Errorf("git fetch-pack from %s: %s: %w", redactedURL, msg, err)
}
return fmt.Errorf("git fetch-pack from %s: %w", redactedURL, err)
}
return nil
}
Expand Down
18 changes: 11 additions & 7 deletions cmd/entire/cli/checkpoint/v2_read.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ func (s *V2GitStore) ReadCommitted(ctx context.Context, checkpointID id.Checkpoi
return nil, nil //nolint:nilnil,nilerr // Checkpoint subtree not found
}

metadataFile, err := cpTree.File(paths.MetadataFileName)
cpFT := s.wrapWithFetcher(ctx, cpTree)
metadataFile, err := cpFT.File(paths.MetadataFileName)
if err != nil {
return nil, nil //nolint:nilnil,nilerr // metadata.json not found
}
Expand Down Expand Up @@ -165,7 +166,8 @@ func (s *V2GitStore) ReadSessionCompactTranscript(ctx context.Context, checkpoin
return nil, ErrCheckpointNotFound
}

compactFile, err := sessionTree.File(paths.CompactTranscriptFileName)
sessionFT := s.wrapWithFetcher(ctx, sessionTree)
compactFile, err := sessionFT.File(paths.CompactTranscriptFileName)
if err != nil {
return nil, ErrNoTranscript
}
Expand Down Expand Up @@ -214,23 +216,24 @@ func (s *V2GitStore) ReadSessionMetadataAndPrompts(ctx context.Context, checkpoi
}

result := &SessionContent{}
sessionFT := s.wrapWithFetcher(ctx, sessionTree)

if metadataFile, fileErr := sessionTree.File(paths.MetadataFileName); fileErr == nil {
if metadataFile, fileErr := sessionFT.File(paths.MetadataFileName); fileErr == nil {
if content, contentErr := metadataFile.Contents(); contentErr == nil {
if jsonErr := json.Unmarshal([]byte(content), &result.Metadata); jsonErr != nil {
return nil, fmt.Errorf("failed to parse session metadata: %w", jsonErr)
}
}
}

if file, fileErr := sessionTree.File(paths.PromptFileName); fileErr == nil {
if file, fileErr := sessionFT.File(paths.PromptFileName); fileErr == nil {
if content, contentErr := file.Contents(); contentErr == nil {
result.Prompts = content
}
}

// Read compact transcript from the same session tree (avoids a second tree walk).
if compactFile, fileErr := sessionTree.File(paths.CompactTranscriptFileName); fileErr == nil {
if compactFile, fileErr := sessionFT.File(paths.CompactTranscriptFileName); fileErr == nil {
if content, contentErr := compactFile.Contents(); contentErr == nil && content != "" {
result.Transcript = []byte(content)
}
Expand Down Expand Up @@ -273,16 +276,17 @@ func (s *V2GitStore) ReadSessionContent(ctx context.Context, checkpointID id.Che
}

result := &SessionContent{}
sessionFT := s.wrapWithFetcher(ctx, sessionTree)

if metadataFile, fileErr := sessionTree.File(paths.MetadataFileName); fileErr == nil {
if metadataFile, fileErr := sessionFT.File(paths.MetadataFileName); fileErr == nil {
if content, contentErr := metadataFile.Contents(); contentErr == nil {
if jsonErr := json.Unmarshal([]byte(content), &result.Metadata); jsonErr != nil {
return nil, fmt.Errorf("failed to parse session metadata: %w", jsonErr)
}
}
}

if file, fileErr := sessionTree.File(paths.PromptFileName); fileErr == nil {
if file, fileErr := sessionFT.File(paths.PromptFileName); fileErr == nil {
if content, contentErr := file.Contents(); contentErr == nil {
result.Prompts = content
}
Expand Down
22 changes: 22 additions & 0 deletions cmd/entire/cli/checkpoint/v2_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ type V2GitStore struct {
// fetching /full/* refs during entire resume). Defaults to "origin".
// Set to the checkpoint remote URL when checkpoint_remote is configured.
FetchRemote string

// blobFetcher fetches missing blobs by hash. When set, read paths wrap
// trees with FetchingTree so missing blobs are auto-recovered (and the
// cat-file fallback covers partial-clone-filtered blobs that go-git's
// storer can't see).
blobFetcher BlobFetchFunc
}

// maxCheckpoints returns the effective rotation threshold.
Expand All @@ -54,6 +60,22 @@ func NewV2GitStore(repo *git.Repository, fetchRemote string) *V2GitStore {
}
}

// SetBlobFetcher configures the store to automatically fetch missing blobs
// on demand when reading from /main trees. Mirrors GitStore.SetBlobFetcher.
// Required for reads against partial-clone repos where blobs may be absent
// or invisible to go-git's cached packfile index.
func (s *V2GitStore) SetBlobFetcher(f BlobFetchFunc) {
s.blobFetcher = f
}

// wrapWithFetcher returns the input tree wrapped in a FetchingTree using
// the configured blob fetcher. Callers use the returned tree's File() /
// Tree() methods instead of the raw go-git ones so missing blobs are
// recovered via the fetcher and the cat-file fallback.
func (s *V2GitStore) wrapWithFetcher(ctx context.Context, tree *object.Tree) *FetchingTree {
return NewFetchingTree(ctx, tree, s.repo.Storer, s.blobFetcher)
}

// ensureRef ensures that a custom ref exists, creating an orphan commit
// with an empty tree if it does not.
func (s *V2GitStore) ensureRef(ctx context.Context, refName plumbing.ReferenceName) error {
Expand Down
Loading
Loading