diff --git a/internal/gitclone/manager.go b/internal/gitclone/manager.go index 576904b..ab44a02 100644 --- a/internal/gitclone/manager.go +++ b/internal/gitclone/manager.go @@ -357,25 +357,16 @@ func (r *Repository) WithFetchExclusion(ctx context.Context, fn func() error) er } } -// MarkRestored configures a restored snapshot (e.g. from S3) as a mirror. -// The caller must have already transitioned to StateCloning (via -// TryStartCloning) before extracting the snapshot. On error the state is -// left as StateCloning so the caller can fall back to a fresh clone. -func (r *Repository) MarkRestored(ctx context.Context) error { - r.mu.Lock() - if r.state == StateReady { - r.mu.Unlock() - return nil - } - r.state = StateCloning - r.mu.Unlock() - - err := configureMirror(ctx, r.path, r.config.PackThreads) - if err == nil && r.config.Maintenance { - err = registerMaintenance(ctx, r.path) +// ConfigureMirror configures a git directory at repoPath as a mirror with +// the repository's pack threads and optional maintenance settings. +func (r *Repository) ConfigureMirror(ctx context.Context, repoPath string) error { + if err := configureMirror(ctx, repoPath, r.config.PackThreads); err != nil { + return errors.Wrap(err, "configure mirror") } - if err != nil { - return errors.Wrap(err, "configure mirror after restore") + if r.config.Maintenance { + if err := registerMaintenance(ctx, repoPath); err != nil { + return errors.Wrap(err, "register maintenance") + } } return nil } @@ -483,10 +474,21 @@ func configureMirror(ctx context.Context, repoPath string, packThreads int) erro const CloneTimeout = 30 * time.Minute func (r *Repository) executeClone(ctx context.Context) error { - if err := os.MkdirAll(filepath.Dir(r.path), 0o750); err != nil { + parentDir := filepath.Dir(r.path) + if err := os.MkdirAll(parentDir, 0o750); err != nil { return errors.Wrap(err, "create clone directory") } + tmpDir, err := os.MkdirTemp(parentDir, ".clone-*") + if err != nil { + return errors.Wrap(err, "create temp clone directory") + } + defer os.RemoveAll(tmpDir) //nolint:errcheck // best-effort cleanup on failure + + // git clone --mirror creates a directory inside tmpDir; we point it at a + // known subdirectory so we can rename it atomically afterwards. + cloneDest := filepath.Join(tmpDir, "repo") + cloneCtx, cancel := context.WithTimeout(ctx, CloneTimeout) defer cancel() @@ -495,11 +497,11 @@ func (r *Repository) executeClone(ctx context.Context) error { // repos the server-side pack computation can take minutes at near-zero // transfer rate, which would trip the speed check. The cloneTimeout // provides the overall safety net instead. - // #nosec G204 - r.upstreamURL and r.path are controlled by us + // #nosec G204 - r.upstreamURL and cloneDest are controlled by us args := []string{ "clone", "--mirror", "-c", "http.postBuffer=" + strconv.Itoa(config.PostBuffer), - r.upstreamURL, r.path, + r.upstreamURL, cloneDest, } cmd, err := r.gitCommand(cloneCtx, args...) @@ -515,16 +517,13 @@ func (r *Repository) executeClone(ctx context.Context) error { return errors.Wrapf(err, "git clone --mirror: %s", string(output)) } - if err := configureMirror(ctx, r.path, r.config.PackThreads); err != nil { - return errors.Wrap(err, "configure mirror") + if err := r.ConfigureMirror(ctx, cloneDest); err != nil { + return errors.WithStack(err) } - if r.config.Maintenance { - if err := registerMaintenance(ctx, r.path); err != nil { - return errors.Wrap(err, "register maintenance") - } + if err := os.Rename(cloneDest, r.path); err != nil { + return errors.Wrap(err, "move clone into place") } - return nil } diff --git a/internal/gitclone/manager_test.go b/internal/gitclone/manager_test.go index aad30a8..310205b 100644 --- a/internal/gitclone/manager_test.go +++ b/internal/gitclone/manager_test.go @@ -326,6 +326,52 @@ func TestRepository_CloneSetsMirrorConfig(t *testing.T) { } } +func TestRepository_CloneFailedLeavesNoDebris(t *testing.T) { + ctx := context.Background() + tmpDir := t.TempDir() + + clonePath := filepath.Join(tmpDir, "mirrors", "github.com", "owner", "repo") + repo := &Repository{ + state: StateEmpty, + path: clonePath, + upstreamURL: "https://github.com/nonexistent-owner-abc123/nonexistent-repo-abc123", + fetchSem: make(chan struct{}, 1), + } + repo.fetchSem <- struct{}{} + + err := repo.Clone(ctx) + assert.Error(t, err) + assert.Equal(t, StateEmpty, repo.State()) + + _, statErr := os.Stat(clonePath) + assert.True(t, os.IsNotExist(statErr), "repo.Path() should not exist after failed clone") +} + +func TestRepository_CloneDoesNotClobberSiblings(t *testing.T) { + ctx := context.Background() + tmpDir := t.TempDir() + + mirrorRoot := filepath.Join(tmpDir, "mirrors") + siblingPath := filepath.Join(mirrorRoot, "github.com", "owner", "sibling") + assert.NoError(t, os.MkdirAll(siblingPath, 0o755)) + assert.NoError(t, os.WriteFile(filepath.Join(siblingPath, "HEAD"), []byte("ref: refs/heads/main\n"), 0o644)) + + clonePath := filepath.Join(mirrorRoot, "github.com") + repo := &Repository{ + state: StateEmpty, + path: clonePath, + upstreamURL: "https://github.com/", + fetchSem: make(chan struct{}, 1), + } + repo.fetchSem <- struct{}{} + + err := repo.Clone(ctx) + assert.Error(t, err) + + _, statErr := os.Stat(siblingPath) + assert.NoError(t, statErr, "sibling mirror should still exist after failed clone") +} + func TestRepository_Repack(t *testing.T) { _, ctx := logging.Configure(t.Context(), logging.Config{Level: slog.LevelError}) tmpDir := t.TempDir() diff --git a/internal/strategy/git/git.go b/internal/strategy/git/git.go index a457743..c17372f 100644 --- a/internal/strategy/git/git.go +++ b/internal/strategy/git/git.go @@ -537,29 +537,41 @@ func (s *Strategy) startClone(ctx context.Context, repo *gitclone.Repository) { // tryRestoreSnapshot attempts to restore a mirror from an S3 mirror snapshot. // Mirror snapshots are bare repositories that can be extracted and used directly -// without any conversion. On failure the repo path is cleaned up so the caller -// can fall back to clone. +// without any conversion. The snapshot is extracted into a temporary directory +// and renamed into place only on success, so a failure can never delete an +// existing mirror directory. func (s *Strategy) tryRestoreSnapshot(ctx context.Context, repo *gitclone.Repository) error { cacheKey := mirrorSnapshotCacheKey(repo.UpstreamURL()) - if err := os.MkdirAll(filepath.Dir(repo.Path()), 0o750); err != nil { + parentDir := filepath.Dir(repo.Path()) + if err := os.MkdirAll(parentDir, 0o750); err != nil { return errors.Wrap(err, "create parent directory for restore") } + tmpDir, err := os.MkdirTemp(parentDir, ".restore-*") + if err != nil { + return errors.Wrap(err, "create temp restore directory") + } + defer os.RemoveAll(tmpDir) //nolint:errcheck // best-effort cleanup on failure + + restoreDest := filepath.Join(tmpDir, "repo") + logger := logging.FromContext(ctx) - if err := snapshot.Restore(ctx, s.cache, cacheKey, repo.Path(), s.config.ZstdThreads); err != nil { - _ = os.RemoveAll(repo.Path()) + if err := snapshot.Restore(ctx, s.cache, cacheKey, restoreDest, s.config.ZstdThreads); err != nil { return errors.Wrap(err, "restore mirror snapshot") } - logger.InfoContext(ctx, "Mirror snapshot extracted", "upstream", repo.UpstreamURL(), "path", repo.Path()) + logger.InfoContext(ctx, "Mirror snapshot extracted", "upstream", repo.UpstreamURL(), "path", restoreDest) + + if err := repo.ConfigureMirror(ctx, restoreDest); err != nil { + return errors.Wrap(err, "configure restored mirror") + } - if err := repo.MarkRestored(ctx); err != nil { - _ = os.RemoveAll(repo.Path()) - return errors.Wrap(err, "mark restored") + if err := os.Rename(restoreDest, repo.Path()); err != nil { + return errors.Wrap(err, "move restored snapshot into place") } - logger.InfoContext(ctx, "Repository marked as restored", "upstream", repo.UpstreamURL(), "state", repo.State()) + logger.InfoContext(ctx, "Repository restored from snapshot", "upstream", repo.UpstreamURL()) return nil } diff --git a/internal/strategy/git/snapshot.go b/internal/strategy/git/snapshot.go index c3ccac3..7d8acf0 100644 --- a/internal/strategy/git/snapshot.go +++ b/internal/strategy/git/snapshot.go @@ -352,7 +352,7 @@ func (s *Strategy) createBundle(ctx context.Context, repo *gitclone.Repository, // No read lock needed: git bundle create reads objects through git's own // file-level locking, safe to run concurrently with fetches. headRef := "HEAD" - if out, err := exec.CommandContext(ctx, "git", "-C", repo.Path(), "symbolic-ref", "HEAD").Output(); err == nil { // #nosec G204 G702 + if out, err := exec.CommandContext(ctx, "git", "-C", repo.Path(), "symbolic-ref", "HEAD").Output(); err == nil { //nolint:gosec // repo.Path() is controlled by us headRef = strings.TrimSpace(string(out)) }