Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 25 additions & 13 deletions client/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,33 +25,45 @@ const SnapshotCommitHeader = "X-Cachew-Snapshot-Commit"
// mirror's current HEAD.
const BundleURLHeader = "X-Cachew-Bundle-Url"

// EnsureGitRefsRequest specifies what the caller wants present on the
// server's mirror. At least one of Refs or Commits must be non-empty.
//
// Refs maps each required ref (e.g. "refs/heads/main") to the expected SHA;
// an empty SHA means "require the ref to exist at any SHA". Commits lists
// individual commit SHAs that must exist in the mirror's object database,
// regardless of which ref points at them.
type EnsureGitRefsRequest struct {
Refs map[string]string `json:"refs,omitempty"`
Commits []string `json:"commits,omitempty"`
}

// EnsureGitRefsResponse is the response returned by EnsureGitRefs.
//
// Refs contains the resolved local SHA for each requested ref (empty if the
// ref is still missing on the server after the fetch). Fetched reports
// whether the server performed an upstream fetch to satisfy the request.
// ref is still missing on the server after the fetch). MissingCommits lists
// the requested commits that are still absent from the server's object
// database. Fetched reports whether the server performed an upstream fetch.
type EnsureGitRefsResponse struct {
Refs map[string]string `json:"refs"`
Fetched bool `json:"fetched"`
Refs map[string]string `json:"refs,omitempty"`
MissingCommits []string `json:"missing_commits,omitempty"`
Fetched bool `json:"fetched"`
}

// EnsureGitRefs asks the cachew server to ensure its local mirror of repoURL
// contains the listed refs at the given SHAs before the caller fetches. An
// empty SHA means "require the ref to exist, at any SHA". The server will
// synchronously fetch from upstream if any requested ref is missing or stale.
// satisfies the request before the caller fetches. The server synchronously
// fetches from upstream if any requested ref is missing/stale or any
// requested commit is absent from its object database.
//
// Use this before issuing a git fetch/clone against cachew when fresh refs
// are required and the default ref-check rate-limit window would otherwise
// allow stale refs to be served.
func (c *Client) EnsureGitRefs(ctx context.Context, repoURL string, refs map[string]string) (EnsureGitRefsResponse, error) {
// or specific commits are required and the default ref-check rate-limit
// window would otherwise allow stale data to be served.
func (c *Client) EnsureGitRefs(ctx context.Context, repoURL string, request EnsureGitRefsRequest) (EnsureGitRefsResponse, error) {
endpoint, err := gitEndpointURL(c.baseURL, repoURL, "ensure-refs")
if err != nil {
return EnsureGitRefsResponse{}, err
}

body, err := json.Marshal(struct {
Refs map[string]string `json:"refs"`
}{Refs: refs})
body, err := json.Marshal(request)
if err != nil {
return EnsureGitRefsResponse{}, errors.Wrap(err, "encode request")
}
Expand Down
44 changes: 38 additions & 6 deletions client/git_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,55 @@ func TestEnsureGitRefs(t *testing.T) {
assert.NoError(t, json.NewDecoder(r.Body).Decode(&receivedBody))
w.Header().Set("Content-Type", "application/json")
assert.NoError(t, json.NewEncoder(w).Encode(map[string]any{
"refs": map[string]string{"refs/heads/main": "abc123"},
"fetched": true,
"refs": map[string]string{"refs/heads/main": "abc123"},
"missing_commits": []string{"deadbeef"},
"fetched": true,
}))
}))
defer srv.Close()

api := client.NewWithHTTPClient(srv.URL, srv.Client())
resp, err := api.EnsureGitRefs(context.Background(),
"https://github.com/org/repo",
map[string]string{"refs/heads/main": ""})
client.EnsureGitRefsRequest{
Refs: map[string]string{"refs/heads/main": ""},
Commits: []string{"abc", "deadbeef"},
})
assert.NoError(t, err)
assert.True(t, resp.Fetched)
assert.Equal(t, "abc123", resp.Refs["refs/heads/main"])
assert.Equal(t, []string{"deadbeef"}, resp.MissingCommits)

refs, ok := receivedBody["refs"].(map[string]any)
assert.True(t, ok)
assert.Equal(t, "", refs["refs/heads/main"])

commits, ok := receivedBody["commits"].([]any)
assert.True(t, ok)
assert.Equal(t, 2, len(commits))
}

func TestEnsureGitRefsCommitsOnly(t *testing.T) {
var receivedBody map[string]any
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
assert.NoError(t, json.NewDecoder(r.Body).Decode(&receivedBody))
w.Header().Set("Content-Type", "application/json")
assert.NoError(t, json.NewEncoder(w).Encode(map[string]any{
"fetched": false,
}))
}))
defer srv.Close()

api := client.NewWithHTTPClient(srv.URL, srv.Client())
resp, err := api.EnsureGitRefs(context.Background(),
"https://github.com/org/repo",
client.EnsureGitRefsRequest{Commits: []string{"abc"}})
assert.NoError(t, err)
assert.False(t, resp.Fetched)
assert.Equal(t, 0, len(resp.MissingCommits))

_, ok := receivedBody["refs"]
assert.False(t, ok, "refs field should be omitted when empty")
}

func TestEnsureGitRefsServerError(t *testing.T) {
Expand All @@ -52,18 +84,18 @@ func TestEnsureGitRefsServerError(t *testing.T) {
api := client.NewWithHTTPClient(srv.URL, srv.Client())
_, err := api.EnsureGitRefs(context.Background(),
"https://github.com/org/repo",
map[string]string{"refs/heads/main": ""})
client.EnsureGitRefsRequest{Refs: map[string]string{"refs/heads/main": ""}})
assert.Error(t, err)
assert.Contains(t, err.Error(), "status 400")
}

func TestEnsureGitRefsInvalidRepoURL(t *testing.T) {
api := client.New("http://example.com", nil)

_, err := api.EnsureGitRefs(context.Background(), "not-a-url", nil)
_, err := api.EnsureGitRefs(context.Background(), "not-a-url", client.EnsureGitRefsRequest{})
assert.Error(t, err)

_, err = api.EnsureGitRefs(context.Background(), "https://github.com/", nil)
_, err = api.EnsureGitRefs(context.Background(), "https://github.com/", client.EnsureGitRefsRequest{})
assert.Error(t, err)
}

Expand Down
89 changes: 61 additions & 28 deletions cmd/cachew/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@ type GitCmd struct {
}

// GitRestoreCmd fetches a git snapshot, extracts it, and optionally applies
// a delta bundle. If --ref is set it then asks the server to ensure those
// refs are fresh and runs `git pull --ff-only` so the working tree catches
// up to upstream.
// a delta bundle. If --ref or --commit is set it then asks the server to
// ensure those refs/commits are fresh and runs `git pull --ff-only` so the
// working tree catches up to upstream.
type GitRestoreCmd struct {
RepoURL string `arg:"" help:"Repository URL (e.g. https://github.com/org/repo)."`
Directory string `arg:"" help:"Target directory for the clone." type:"path"`
Ref map[string]string `help:"Required refs to freshen on the server before pulling, in the form 'name=sha' (e.g. 'refs/heads/main=abc123'). An empty SHA means any SHA is acceptable. Setting this also runs a final 'git pull' from origin so the working tree is brought up to date."`
Ref map[string]string `help:"Required refs to freshen on the server before pulling, in the form 'name=sha' (e.g. 'refs/heads/main=abc123'). An empty SHA means any SHA is acceptable. Setting this (or --commit) runs a final 'git pull' from origin so the working tree is brought up to date."`
Commit []string `help:"Required commit SHAs that must exist on the server, regardless of which ref points at them. May be repeated."`
NoBundle bool `help:"Skip applying delta bundle."`
ZstdThreads int `help:"Threads for zstd decompression (0 = all CPU cores)." default:"0"`
}
Expand Down Expand Up @@ -61,9 +62,9 @@ func (c *GitRestoreCmd) Run(ctx context.Context, api *client.Client) error {

// Snapshot + bundle leave the working tree at whatever the mirror had
// when the bundle was last generated, which may be arbitrarily old. If
// the caller asked for specific refs to be fresh, freshen the mirror
// (if needed) and pull from origin (if needed) to catch up.
if len(c.Ref) > 0 {
// the caller asked for specific refs or commits to be fresh, freshen
// the mirror (if needed) and pull from origin (if needed) to catch up.
if len(c.Ref) > 0 || len(c.Commit) > 0 {
if err := c.satisfyRefs(ctx, api); err != nil {
return err
}
Expand All @@ -72,31 +73,47 @@ func (c *GitRestoreCmd) Run(ctx context.Context, api *client.Client) error {
return nil
}

// satisfyRefs ensures the working tree contains every requested ref. It
// short-circuits whenever the local clone already has what was asked for,
// avoiding both /ensure-refs and git pull when the snapshot+bundle already
// brought down the requested SHAs.
// satisfyRefs ensures the working tree contains every requested ref and
// commit. It short-circuits whenever the local clone already has what was
// asked for, avoiding both /ensure-refs and git pull when the snapshot+bundle
// already brought down the required SHAs.
func (c *GitRestoreCmd) satisfyRefs(ctx context.Context, api *client.Client) error {
// Fast path: if every ref is pinned to a specific SHA and the local
// clone already has all those commits, we're done.
if allPinned(c.Ref) && localHasAllCommits(ctx, c.Directory, c.Ref) {
fmt.Fprintf(os.Stderr, "All requested refs already present locally\n") //nolint:forbidigo
// Fast path: if every ref is pinned and the local clone has every ref
// SHA and every requested commit, we're done.
if allPinned(c.Ref) &&
localHasAllRefSHAs(ctx, c.Directory, c.Ref) &&
localHasAllSHAs(ctx, c.Directory, c.Commit) {
fmt.Fprintf(os.Stderr, "All requested refs/commits already present locally\n") //nolint:forbidigo
return nil
}

fmt.Fprintf(os.Stderr, "Ensuring %d ref(s) are fresh for %s\n", len(c.Ref), c.RepoURL) //nolint:forbidigo
resp, err := api.EnsureGitRefs(ctx, c.RepoURL, c.Ref)
fmt.Fprintf(os.Stderr, "Ensuring %d ref(s) and %d commit(s) are fresh for %s\n", //nolint:forbidigo
len(c.Ref), len(c.Commit), c.RepoURL)
resp, err := api.EnsureGitRefs(ctx, c.RepoURL, client.EnsureGitRefsRequest{
Refs: c.Ref,
Commits: c.Commit,
})
if err != nil {
return errors.Wrap(err, "ensure refs")
}
if resp.Fetched {
fmt.Fprintf(os.Stderr, "Server fetched fresh refs from upstream\n") //nolint:forbidigo
}

// If the server's resolved SHAs are already in our local clone (e.g.
// the bundle brought them in), there's nothing new to pull.
if len(resp.Refs) > 0 && localHasAllCommits(ctx, c.Directory, resp.Refs) {
fmt.Fprintf(os.Stderr, "Local clone already contains the server's resolved refs\n") //nolint:forbidigo
if len(resp.MissingCommits) > 0 {
return errors.Errorf("server is missing %d commit(s) after fetch: %v",
len(resp.MissingCommits), resp.MissingCommits)
}

// If the server's resolved SHAs and all requested commits are already
// in our local clone (e.g. the bundle brought them in), there's nothing
// new to pull. We only treat refs as "satisfied" when the server
// actually resolved them; an empty resp.Refs (e.g. unknown ref) leaves
// us no positive evidence, so fall through to the pull.
refsSatisfied := len(c.Ref) == 0 ||
(len(resp.Refs) == len(c.Ref) && localHasAllRefSHAs(ctx, c.Directory, resp.Refs))
commitsSatisfied := localHasAllSHAs(ctx, c.Directory, c.Commit)
if refsSatisfied && commitsSatisfied {
fmt.Fprintf(os.Stderr, "Local clone already contains the server's resolved refs and commits\n") //nolint:forbidigo
return nil
}

Expand All @@ -117,22 +134,38 @@ func allPinned(refs map[string]string) bool {
return true
}

// localHasAllCommits reports whether every non-empty SHA in refs exists in
// the working clone's object database. Refs with empty SHAs cause it to
// return false, since there's no SHA to look for.
func localHasAllCommits(ctx context.Context, directory string, refs map[string]string) bool {
// localHasAllRefSHAs reports whether every non-empty SHA in refs exists in
// the working clone's object database. An empty refs map returns true
// (nothing to check); a ref with an empty SHA causes it to return false
// because we don't know what to look for.
func localHasAllRefSHAs(ctx context.Context, directory string, refs map[string]string) bool {
for _, sha := range refs {
if sha == "" {
return false
}
// #nosec G204 - directory and sha are controlled by us
if err := exec.CommandContext(ctx, "git", "-C", directory, "cat-file", "-e", sha).Run(); err != nil {
if !localHasSHA(ctx, directory, sha) {
return false
}
}
return true
}

// localHasAllSHAs reports whether every commit SHA exists in the working
// clone's object database. An empty slice returns true.
func localHasAllSHAs(ctx context.Context, directory string, shas []string) bool {
for _, sha := range shas {
if !localHasSHA(ctx, directory, sha) {
return false
}
}
return true
}

func localHasSHA(ctx context.Context, directory, sha string) bool {
// #nosec G204 - directory and sha are controlled by us
return exec.CommandContext(ctx, "git", "-C", directory, "cat-file", "-e", sha).Run() == nil
}

func applyBundle(ctx context.Context, api *client.Client, bundleURL, directory string) error {
body, err := api.OpenGitBundle(ctx, bundleURL)
if err != nil {
Expand Down
76 changes: 75 additions & 1 deletion cmd/cachew/git_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func createTarZst(t *testing.T, dir string) []byte {
return buf.Bytes()
}

func gitRevParse(t *testing.T, dir, ref string) string {
func gitRevParse(t *testing.T, dir, ref string) string { //nolint:unparam // helper accepts any ref
t.Helper()
out, err := exec.Command("git", "-C", dir, "rev-parse", ref).Output() //nolint:gosec
assert.NoError(t, err)
Expand Down Expand Up @@ -328,6 +328,80 @@ func TestGitRestoreSkipsEnsureRefsWhenLocalHasSHA(t *testing.T) {
assert.False(t, ensureCalled.Load(), "ensure-refs should be skipped when local clone has the requested SHA")
}

func TestGitRestoreSkipsEnsureRefsWhenLocalHasCommit(t *testing.T) {
srcDir := t.TempDir()
initGitRepo(t, srcDir, map[string]string{"file.txt": "v1"})
localSHA := gitRevParse(t, srcDir, "HEAD")

snapshotData := createTarZst(t, srcDir)

var ensureCalled atomic.Bool
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case strings.HasSuffix(r.URL.Path, "/snapshot.tar.zst"):
w.Header().Set("Content-Type", "application/zstd")
w.Write(snapshotData) //nolint:errcheck

case strings.HasSuffix(r.URL.Path, "/ensure-refs"):
ensureCalled.Store(true)
http.Error(w, "should not be called", http.StatusInternalServerError)

default:
http.NotFound(w, r)
}
}))
defer srv.Close()

dstDir := filepath.Join(t.TempDir(), "restored")
restoreCmd := &GitRestoreCmd{
RepoURL: "https://github.com/test/repo",
Directory: dstDir,
Commit: []string{localSHA},
NoBundle: true,
}
api := client.NewWithHTTPClient(srv.URL, srv.Client())
assert.NoError(t, restoreCmd.Run(context.Background(), api))
assert.False(t, ensureCalled.Load(), "ensure-refs should be skipped when local clone has the requested commit")
}

func TestGitRestoreCommitMissingAfterFetchIsFatal(t *testing.T) {
srcDir := t.TempDir()
initGitRepo(t, srcDir, map[string]string{"file.txt": "v1"})
snapshotData := createTarZst(t, srcDir)
missingSHA := "0000000000000000000000000000000000000000"

srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case strings.HasSuffix(r.URL.Path, "/snapshot.tar.zst"):
w.Header().Set("Content-Type", "application/zstd")
w.Write(snapshotData) //nolint:errcheck

case strings.HasSuffix(r.URL.Path, "/ensure-refs"):
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(map[string]any{ //nolint:errcheck
"missing_commits": []string{missingSHA},
"fetched": true,
})

default:
http.NotFound(w, r)
}
}))
defer srv.Close()

dstDir := filepath.Join(t.TempDir(), "restored")
restoreCmd := &GitRestoreCmd{
RepoURL: "https://github.com/test/repo",
Directory: dstDir,
Commit: []string{missingSHA},
NoBundle: true,
}
api := client.NewWithHTTPClient(srv.URL, srv.Client())
err := restoreCmd.Run(context.Background(), api)
assert.Error(t, err)
assert.Contains(t, err.Error(), "server is missing")
}

func TestGitRestoreSkipsPullWhenLocalHasResolvedSHA(t *testing.T) {
srcDir := t.TempDir()
initGitRepo(t, srcDir, map[string]string{"file.txt": "v1"})
Expand Down
Loading