diff --git a/docs/env-variables.md b/docs/env-variables.md index 6b5bd28c..e49e2a62 100644 --- a/docs/env-variables.md +++ b/docs/env-variables.md @@ -28,7 +28,7 @@ | `--git-clone-depth` | `ENVBUILDER_GIT_CLONE_DEPTH` | | The depth to use when cloning the Git repository. | | `--git-clone-single-branch` | `ENVBUILDER_GIT_CLONE_SINGLE_BRANCH` | | Clone only a single branch of the Git repository. | | `--git-clone-thinpack` | `ENVBUILDER_GIT_CLONE_THINPACK` | `true` | Git clone with thin pack compatibility enabled, ensuring that even when thin pack compatibility is activated,it will not be turned on for the domain dev.azure.com. | -| `--git-clone-submodules` | `ENVBUILDER_GIT_CLONE_SUBMODULES` | | Clone Git submodules after cloning the repository. Accepts 'true' (max depth 10), 'false' (disabled), or a positive integer for max recursion depth. | +| `--git-clone-submodules` | `ENVBUILDER_GIT_CLONE_SUBMODULES` | | Clone Git submodules after cloning the repository. Accepts 'true' (max depth 10), 'false' (disabled), or a non-negative integer for max recursion depth (0 disables, max 100). | | `--git-username` | `ENVBUILDER_GIT_USERNAME` | | The username to use for Git authentication. This is optional. | | `--git-password` | `ENVBUILDER_GIT_PASSWORD` | | The password to use for Git authentication. This is optional. | | `--git-ssh-private-key-path` | `ENVBUILDER_GIT_SSH_PRIVATE_KEY_PATH` | | Path to an SSH private key to be used for Git authentication. If this is set, then GIT_SSH_PRIVATE_KEY_BASE64 cannot be set. | diff --git a/git/git.go b/git/git.go index 74eaea56..a5fcdfe1 100644 --- a/git/git.go +++ b/git/git.go @@ -17,6 +17,7 @@ import ( "github.com/coder/envbuilder/options" "github.com/go-git/go-billy/v5" + billyutil "github.com/go-git/go-billy/v5/util" "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/config" "github.com/go-git/go-git/v5/plumbing" @@ -108,8 +109,7 @@ func CloneRepo(ctx context.Context, logf func(string, ...any), opts CloneRepoOpt return false, fmt.Errorf("chroot .git: %w", err) } gitStorage := filesystem.NewStorage(gitDir, cache.NewObjectLRU(cache.DefaultMaxSize*10)) - fsStorage := filesystem.NewStorage(fs, cache.NewObjectLRU(cache.DefaultMaxSize*10)) - repo, err := git.Open(fsStorage, gitDir) + repo, err := git.Open(gitStorage, fs) if errors.Is(err, git.ErrRepositoryNotExists) { err = nil } @@ -133,7 +133,7 @@ func CloneRepo(ctx context.Context, logf func(string, ...any), opts CloneRepoOpt if errors.Is(err, git.ErrRepositoryAlreadyExists) { // The repository was created between our Open and CloneContext // calls. Reopen it so submodule initialization can still run. - repo, err = git.Open(fsStorage, gitDir) + repo, err = git.Open(gitStorage, fs) if err != nil { return false, fmt.Errorf("reopen existing %q: %w", opts.RepoURL, err) } @@ -453,25 +453,17 @@ var scpLikeURLRegex = regexp.MustCompile(`^([^@]+)@([^:]+):(.+)$`) // extractHost extracts the host from a URL, handling both standard URLs and SCP-like URLs. // Returns empty string if host cannot be determined. func extractHost(u string) string { - // Try standard URL parsing first - if parsed, err := url.Parse(u); err == nil && parsed.Host != "" { - // Remove port if present - host := parsed.Hostname() - return strings.ToLower(host) + ep, err := transport.NewEndpoint(u) + if err != nil || ep.Host == "" { + return "" } - - // Handle SCP-like URLs: user@host:path - if matches := scpLikeURLRegex.FindStringSubmatch(u); matches != nil { - return strings.ToLower(matches[2]) - } - - return "" + return strings.ToLower(ep.Host) } // SameHost checks if two URLs point to the same host. // Used to determine if credentials should be forwarded to submodules. -// The comparison is hostname-only. Port is ignored to match git's -// credential-helper convention, which keys credentials on the host alone. +// The comparison is hostname-only. Port is ignored as a simplification; +// submodules on the same host at different ports are uncommon. func SameHost(url1, url2 string) bool { host1 := extractHost(url1) host2 := extractHost(url2) @@ -605,6 +597,7 @@ func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Re } logf("Parent repository URL: %s", RedactURL(effectiveParentURL)) + var warnings int for _, sub := range subs { select { case <-ctx.Done(): @@ -650,11 +643,13 @@ func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Re subRepo, subWorktree, err := openSubmoduleRepo(parentWorktree, subConfig.Path) if err != nil { logf(" ⚠ Could not open submodule repository %s for nested traversal: %v", subConfig.Name, err) + warnings++ continue } nestedSubs, err := subWorktree.Submodules() if err != nil { logf(" ⚠ Could not list nested submodules in %s: %v", subConfig.Name, err) + warnings++ continue } if len(nestedSubs) == 0 { @@ -666,7 +661,11 @@ func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Re } } - logf("✓ All submodules initialized successfully") + if warnings > 0 { + logf("⚠ Submodule initialization finished with %d warning(s)", warnings) + } else { + logf("✓ All submodules initialized successfully") + } return nil } @@ -674,6 +673,13 @@ func initSubmodules(ctx context.Context, logf func(string, ...any), repo *git.Re // returns parentAuth if the submodule shares the parent's host, and nil // otherwise. A warning is logged when parent auth is set but withheld // because the hosts differ. +// +// The check is host-only and does not compare transport protocols. If +// the parent uses SSH auth and a submodule on the same host uses HTTPS, +// the SSH auth is forwarded and go-git rejects it at the transport +// layer. This is intentional: returning nil here would silently skip +// auth for a submodule that legitimately needs it under a different +// protocol. func submoduleAuthFor(logf func(string, ...any), parentURL, submoduleURL string, parentAuth transport.AuthMethod) transport.AuthMethod { if parentAuth == nil { return nil @@ -723,15 +729,7 @@ func cloneSubmodule(ctx context.Context, logf func(string, ...any), parentWorktr // Check if already cloned if _, statErr := subFS.Stat(".git"); statErr == nil { logf(" Submodule already cloned, checking out expected commit...") - // Open the existing repository - subGitDir, err := subFS.Chroot(".git") - if err != nil { - return fmt.Errorf("chroot to existing .git: %w", err) - } - subRepo, err := git.Open( - filesystem.NewStorage(subGitDir, cache.NewObjectLRU(cache.DefaultMaxSize*10)), - subFS, - ) + subRepo, _, err := openSubmoduleRepo(parentWorktree, subConfig.Path) if err != nil { return fmt.Errorf("open existing submodule: %w", err) } @@ -768,6 +766,9 @@ func cloneSubmodule(ctx context.Context, logf func(string, ...any), parentWorktr NoCheckout: true, }) if err != nil { + if rmErr := billyutil.RemoveAll(subFS, ".git"); rmErr != nil { + logf(" ⚠ Failed to clean up .git after clone failure: %v", rmErr) + } return fmt.Errorf("clone submodule repository: %w", err) } @@ -796,7 +797,7 @@ func checkoutSubmoduleCommit(ctx context.Context, logf func(string, ...any), sub }) if fetchErr != nil && !errors.Is(fetchErr, git.NoErrAlreadyUpToDate) { // If that fails, try fetching all refs - logf(" Direct fetch failed, fetching all refs...") + logf(" Direct fetch failed (%v), fetching all refs...", fetchErr) fetchAllErr := subRepo.FetchContext(ctx, &git.FetchOptions{ RemoteName: "origin", Auth: submoduleAuth, diff --git a/git/git_test.go b/git/git_test.go index 5c97ed57..c172f582 100644 --- a/git/git_test.go +++ b/git/git_test.go @@ -94,6 +94,9 @@ func TestCloneRepo(t *testing.T) { // We do not overwrite a repo if one is already present. t.Run("AlreadyCloned", func(t *testing.T) { + if !tc.expectClone { + t.Skip("cannot test already-cloned when the initial clone is expected to fail") + } srvFS := memfs.New() _ = gittest.NewRepo(t, srvFS, gittest.Commit(t, "README.md", "Hello, world!", "Wow!")) authMW := mwtest.BasicAuthMW(tc.srvUsername, tc.srvPassword) @@ -102,12 +105,24 @@ func TestCloneRepo(t *testing.T) { tc.mungeURL(&srv.URL) } clientFS := memfs.New() - // A repo already exists! - _ = gittest.NewRepo(t, clientFS) + // Clone once so the repo exists in the layout CloneRepo expects. cloned, err := git.CloneRepo(context.Background(), t.Logf, git.CloneRepoOptions{ Path: "/", RepoURL: srv.URL, Storage: clientFS, + RepoAuth: &githttp.BasicAuth{ + Username: tc.username, + Password: tc.password, + }, + }) + require.NoError(t, err) + require.True(t, cloned) + + // Second call: repo already exists, must be a no-op. + cloned, err = git.CloneRepo(context.Background(), t.Logf, git.CloneRepoOptions{ + Path: "/", + RepoURL: srv.URL, + Storage: clientFS, }) require.NoError(t, err) require.False(t, cloned) @@ -909,6 +924,84 @@ func TestCloneOptionsFromOptions_Submodules(t *testing.T) { require.Equal(t, 10, cloneOpts.SubmoduleDepth) } +// Regression: git.Open must find a repo that CloneRepo previously +// created. Before the fix, the storer and worktree arguments were +// swapped, so Open looked for HEAD at the worktree root instead of +// .git/ and returned ErrRepositoryNotExists on every restart. +func TestCloneRepo_RestartFindsExistingRepo(t *testing.T) { + t.Parallel() + + srvFS := memfs.New() + _ = gittest.NewRepo(t, srvFS, gittest.Commit(t, "README.md", "hello", "init")) + srv := httptest.NewServer(mwtest.BasicAuthMW("", "")(gittest.NewServer(srvFS))) + t.Cleanup(srv.Close) + + clientFS := memfs.New() + cloned, err := git.CloneRepo(context.Background(), t.Logf, git.CloneRepoOptions{ + Path: "/workspace", + RepoURL: srv.URL, + Storage: clientFS, + }) + require.NoError(t, err) + require.True(t, cloned) + + // Second call simulates a workspace restart. The repo exists on + // disk; CloneRepo must detect it and return (false, nil). + cloned, err = git.CloneRepo(context.Background(), t.Logf, git.CloneRepoOptions{ + Path: "/workspace", + RepoURL: srv.URL, + Storage: clientFS, + }) + require.NoError(t, err) + require.False(t, cloned, "second CloneRepo call must detect the existing repo") +} + +// Regression: a failed submodule clone must not leave behind a .git +// directory that blocks all future retries. Before the fix, MkdirAll +// created .git before CloneContext, and a clone failure left it in +// place. The next start's Stat(".git") found it, entered the +// already-cloned path, and failed permanently. +func TestCloneRepo_SubmoduleCloneFailureAllowsRetry(t *testing.T) { + t.Parallel() + + // Submodule server that rejects all requests. + subSrv := httptest.NewServer(mwtest.BasicAuthMW("secret", "secret")(gittest.NewServer(memfs.New()))) + t.Cleanup(subSrv.Close) + + // Parent server with a submodule pointing at the rejecting server. + subFS := memfs.New() + subRepo := gittest.NewRepo(t, subFS, gittest.Commit(t, "f.txt", "x", "init")) + subHead, err := subRepo.Head() + require.NoError(t, err) + + parentFS := memfs.New() + _ = gittest.NewRepo(t, parentFS, + gittest.Commit(t, "README.md", "hello", "init"), + gittest.CommitSubmodule(t, "sub", subSrv.URL, subHead.Hash()), + ) + parentSrv := httptest.NewServer(mwtest.BasicAuthMW("", "")(gittest.NewServer(parentFS))) + t.Cleanup(parentSrv.Close) + + clientFS := memfs.New() + + // First attempt: clone succeeds but submodule init fails (auth). + _, err = git.CloneRepo(context.Background(), t.Logf, git.CloneRepoOptions{ + Path: "/workspace", + RepoURL: parentSrv.URL, + Storage: clientFS, + SubmoduleDepth: 1, + }) + require.Error(t, err, "submodule clone should fail") + + // The submodule's .git must have been cleaned up. + workFS, err := clientFS.Chroot("/workspace") + require.NoError(t, err) + subModFS, err := workFS.Chroot("sub") + require.NoError(t, err) + _, statErr := subModFS.Stat(".git") + require.Error(t, statErr, ".git must not exist after failed clone") +} + // generates a random ed25519 private key func randKeygen(t *testing.T) gossh.Signer { t.Helper() diff --git a/git/submodule_auth_internal_test.go b/git/submodule_auth_internal_test.go index f9c0b959..618f077f 100644 --- a/git/submodule_auth_internal_test.go +++ b/git/submodule_auth_internal_test.go @@ -2,6 +2,7 @@ package git import ( "bytes" + "fmt" "testing" "github.com/go-git/go-git/v5/plumbing/transport" @@ -59,7 +60,7 @@ func TestSubmoduleAuthFor(t *testing.T) { t.Run(c.name, func(t *testing.T) { t.Parallel() var buf bytes.Buffer - logf := func(format string, _ ...any) { _, _ = buf.WriteString(format) } + logf := func(format string, args ...any) { fmt.Fprintf(&buf, format, args...) } got := submoduleAuthFor(logf, c.parentURL, c.submoduleURL, c.parentAuth) require.Equal(t, c.wantAuth, got) if c.wantWarn { diff --git a/options/options.go b/options/options.go index 508d1d00..69048b59 100644 --- a/options/options.go +++ b/options/options.go @@ -14,10 +14,13 @@ import ( ) // SubmoduleDepth is a custom type for handling submodule depth that accepts -// "true" (defaults to 10), "false" (0), or a positive integer. +// "true" (defaults to 10), "false" (0), or a non-negative integer (0 disables). type SubmoduleDepth int -const DefaultSubmoduleDepth = 10 +const ( + DefaultSubmoduleDepth SubmoduleDepth = 10 + MaxSubmoduleDepth SubmoduleDepth = 100 +) func (s *SubmoduleDepth) Set(val string) error { lower := strings.ToLower(strings.TrimSpace(val)) @@ -31,11 +34,14 @@ func (s *SubmoduleDepth) Set(val string) error { } n, err := strconv.Atoi(lower) if err != nil { - return fmt.Errorf("invalid submodule depth %q: must be true, false, or a positive integer", val) + return fmt.Errorf("invalid submodule depth %q: must be true, false, or a non-negative integer", val) } if n < 0 { return fmt.Errorf("submodule depth must be non-negative, got %d", n) } + if n > int(MaxSubmoduleDepth) { + return fmt.Errorf("submodule depth must be at most %d, got %d", MaxSubmoduleDepth, n) + } *s = SubmoduleDepth(n) return nil } @@ -149,9 +155,9 @@ type Options struct { // GitCloneThinPack clone with thin pack compabilities. This is optional. GitCloneThinPack bool // GitCloneSubmoduleDepth controls submodule initialization after cloning. - // 0 = disabled (default), positive integer = max recursion depth. + // 0 = disabled (default), >0 = max recursion depth (capped at MaxSubmoduleDepth). // The flag accepts "true" (defaults to DefaultSubmoduleDepth), "false" - // (0), or a positive integer for the max recursion depth. + // (0), or a non-negative integer (0 disables). GitCloneSubmoduleDepth int // GitUsername is the username to use for Git authentication. This is // optional. @@ -436,7 +442,7 @@ func (o *Options) CLI() serpent.OptionSet { Env: WithEnvPrefix("GIT_CLONE_SUBMODULES"), Value: SubmoduleDepthOf(&o.GitCloneSubmoduleDepth), Description: "Clone Git submodules after cloning the repository. " + - "Accepts 'true' (max depth 10), 'false' (disabled), or a positive integer for max recursion depth.", + "Accepts 'true' (max depth 10), 'false' (disabled), or a non-negative integer for max recursion depth (0 disables, max 100).", }, { Flag: "git-username", diff --git a/options/testdata/options.golden b/options/testdata/options.golden index 6c086d56..918b3dde 100644 --- a/options/testdata/options.golden +++ b/options/testdata/options.golden @@ -101,8 +101,8 @@ OPTIONS: --git-clone-submodules submodule-depth, $ENVBUILDER_GIT_CLONE_SUBMODULES Clone Git submodules after cloning the repository. Accepts 'true' (max - depth 10), 'false' (disabled), or a positive integer for max recursion - depth. + depth 10), 'false' (disabled), or a non-negative integer for max + recursion depth (0 disables, max 100). --git-clone-thinpack bool, $ENVBUILDER_GIT_CLONE_THINPACK (default: true) Git clone with thin pack compatibility enabled, ensuring that even