diff --git a/README.md b/README.md index 9984ccd..103cdbd 100644 --- a/README.md +++ b/README.md @@ -467,3 +467,4 @@ Compared to the typical workflow, there's no need to name branches, run `git add | 5 | Invalid arguments or flags | | 6 | Disambiguation required (branch belongs to multiple stacks) | | 7 | Rebase already in progress | +| 8 | Stack is locked by another process | diff --git a/cmd/add.go b/cmd/add.go index 9cb8dd0..8015c65 100644 --- a/cmd/add.go +++ b/cmd/add.go @@ -200,8 +200,7 @@ func runAdd(cfg *config.Config, opts *addOptions, args []string) error { } if err := stack.Save(gitDir, sf); err != nil { - cfg.Errorf("failed to save stack state: %s", err) - return ErrSilent + return handleSaveError(cfg, err) } // Print summary diff --git a/cmd/init.go b/cmd/init.go index 24e3dae..7c002d2 100644 --- a/cmd/init.go +++ b/cmd/init.go @@ -327,7 +327,7 @@ func runInit(cfg *config.Config, opts *initOptions) error { syncStackPRs(cfg, &sf.Stacks[len(sf.Stacks)-1]) if err := stack.Save(gitDir, sf); err != nil { - return err + return handleSaveError(cfg, err) } // Print result diff --git a/cmd/merge.go b/cmd/merge.go index 74c452d..ce5bf8e 100644 --- a/cmd/merge.go +++ b/cmd/merge.go @@ -44,7 +44,7 @@ func runMerge(cfg *config.Config, target string) error { syncStackPRs(cfg, s) // Persist the refreshed PR state. - _ = stack.Save(result.GitDir, result.StackFile) + stack.SaveNonBlocking(result.GitDir, result.StackFile) // Resolve which branch to operate on. var br *stack.BranchRef diff --git a/cmd/push.go b/cmd/push.go index eb08f46..5e47f74 100644 --- a/cmd/push.go +++ b/cmd/push.go @@ -180,8 +180,7 @@ func runPush(cfg *config.Config, opts *pushOptions) error { syncStackPRs(cfg, s) if err := stack.Save(gitDir, sf); err != nil { - cfg.Errorf("failed to save stack state: %s", err) - return ErrSilent + return handleSaveError(cfg, err) } cfg.Successf("Pushed and synced %d branches", len(s.ActiveBranches())) diff --git a/cmd/rebase.go b/cmd/rebase.go index 406f78d..b9f8d52 100644 --- a/cmd/rebase.go +++ b/cmd/rebase.go @@ -305,7 +305,7 @@ func runRebase(cfg *config.Config, opts *rebaseOptions) error { syncStackPRs(cfg, s) - _ = stack.Save(gitDir, sf) + stack.SaveNonBlocking(gitDir, sf) merged := s.MergedBranches() if len(merged) > 0 { @@ -496,7 +496,7 @@ func continueRebase(cfg *config.Config, gitDir string) error { syncStackPRs(cfg, s) - _ = stack.Save(gitDir, sf) + stack.SaveNonBlocking(gitDir, sf) cfg.Printf("All branches in stack rebased locally with %s", s.Trunk.Branch) cfg.Printf("To push up your changes and open/update the stack of PRs, run `%s`", diff --git a/cmd/sync.go b/cmd/sync.go index 840f6dd..52c8184 100644 --- a/cmd/sync.go +++ b/cmd/sync.go @@ -221,7 +221,7 @@ func runSync(cfg *config.Config, opts *syncOptions) error { } else { // Persist refreshed PR state even on conflict, then bail out // before pushing or reporting success. - _ = stack.Save(gitDir, sf) + stack.SaveNonBlocking(gitDir, sf) return ErrConflict } } @@ -288,8 +288,7 @@ func runSync(cfg *config.Config, opts *syncOptions) error { updateBaseSHAs(s) if err := stack.Save(gitDir, sf); err != nil { - cfg.Errorf("failed to save stack state: %s", err) - return ErrSilent + return handleSaveError(cfg, err) } cfg.Printf("") diff --git a/cmd/unstack.go b/cmd/unstack.go index a8c6a12..0be869c 100644 --- a/cmd/unstack.go +++ b/cmd/unstack.go @@ -50,8 +50,7 @@ func runUnstack(cfg *config.Config, opts *unstackOptions) error { // Remove from local tracking sf.RemoveStackForBranch(target) if err := stack.Save(gitDir, sf); err != nil { - cfg.Errorf("failed to save stack state: %s", err) - return ErrSilent + return handleSaveError(cfg, err) } cfg.Successf("Stack removed from local tracking") diff --git a/cmd/utils.go b/cmd/utils.go index df0a407..815b8a8 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -26,6 +26,7 @@ var ( ErrInvalidArgs = &ExitError{Code: 5} // invalid arguments or flags ErrDisambiguate = &ExitError{Code: 6} // multiple stacks/remotes, can't auto-select ErrRebaseActive = &ExitError{Code: 7} // rebase already in progress + ErrLockFailed = &ExitError{Code: 8} // could not acquire stack file lock ) // ExitError is returned by commands to indicate a specific exit code. @@ -77,6 +78,9 @@ type loadStackResult struct { // branch, calls resolveStack (which may prompt for disambiguation), checks for // a nil stack, and re-reads the current branch (in case disambiguation caused // a checkout). Errors are printed via cfg and returned. +// +// loadStack does NOT acquire the stack file lock. The lock is acquired +// automatically by stack.Save() when writing. func loadStack(cfg *config.Config, branch string) (*loadStackResult, error) { gitDir, err := git.GitDir() if err != nil { @@ -133,6 +137,24 @@ func loadStack(cfg *config.Config, branch string) (*loadStackResult, error) { }, nil } +// handleSaveError translates a stack.Save error into the appropriate user +// message and exit error. Lock contention and stale-file detection both +// return ErrLockFailed (exit 8); other write failures return ErrSilent (exit 1). +func handleSaveError(cfg *config.Config, err error) error { + var lockErr *stack.LockError + if errors.As(err, &lockErr) { + cfg.Errorf("another process is currently editing the stack — try again later") + return ErrLockFailed + } + var staleErr *stack.StaleError + if errors.As(err, &staleErr) { + cfg.Errorf("stack file was modified by another process — please re-run the command") + return ErrLockFailed + } + cfg.Errorf("failed to save stack state: %s", err) + return ErrSilent +} + // resolveStack finds the stack for the given branch, handling ambiguity when // a branch (typically a trunk) belongs to multiple stacks. If exactly one // stack matches, it is returned directly. If multiple stacks match, the user diff --git a/cmd/view.go b/cmd/view.go index 11116ac..fb4cc47 100644 --- a/cmd/view.go +++ b/cmd/view.go @@ -50,9 +50,9 @@ func runView(cfg *config.Config, opts *viewOptions) error { s := result.Stack currentBranch := result.CurrentBranch - // Sync PR state + // Sync PR state and save (best-effort). syncStackPRs(cfg, s) - _ = stack.Save(gitDir, sf) + stack.SaveNonBlocking(gitDir, sf) if opts.asJSON { return viewJSON(cfg, s, currentBranch) diff --git a/internal/stack/lock.go b/internal/stack/lock.go new file mode 100644 index 0000000..02a9dd9 --- /dev/null +++ b/internal/stack/lock.go @@ -0,0 +1,89 @@ +package stack + +import ( + "fmt" + "os" + "path/filepath" + "time" +) + +const lockFileName = "gh-stack.lock" + +// LockError is returned when the stack file lock cannot be acquired. +// Callers can check for this with errors.As to distinguish lock failures +// from other errors. +type LockError struct { + Err error +} + +func (e *LockError) Error() string { return e.Err.Error() } +func (e *LockError) Unwrap() error { return e.Err } + +// StaleError is returned when the stack file was modified on disk since it +// was loaded. This indicates another process wrote to the file concurrently. +// Callers can check for this with errors.As. +type StaleError struct { + Err error +} + +func (e *StaleError) Error() string { return e.Err.Error() } +func (e *StaleError) Unwrap() error { return e.Err } + +// LockTimeout is how long Lock() will wait for the exclusive lock before +// giving up. With the lock held only during file writes (milliseconds), +// this timeout primarily guards against a hung process holding the lock. +var LockTimeout = 5 * time.Second + +// lockRetryInterval is the sleep between non-blocking lock attempts. +const lockRetryInterval = 100 * time.Millisecond + +// FileLock provides an exclusive advisory lock on the stack file to prevent +// concurrent writes between multiple gh-stack processes. +type FileLock struct { + f *os.File +} + +// Lock acquires an exclusive lock on the stack file in the given git directory. +// It retries with a non-blocking attempt every 100ms for up to LockTimeout. +// +// Most callers should not use Lock directly — stack.Save() acquires the lock +// automatically. Use Lock only when you need to hold the lock across multiple +// operations (e.g. Load-Modify-Save as an atomic unit). +func Lock(gitDir string) (*FileLock, error) { + path := filepath.Join(gitDir, lockFileName) + f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0644) + if err != nil { + return nil, fmt.Errorf("opening lock file: %w", err) + } + + deadline := time.Now().Add(LockTimeout) + for { + err := tryLockFile(f) + if err == nil { + return &FileLock{f: f}, nil + } + if !isLockBusy(err) { + // Unexpected error (e.g. bad fd) — don't retry. + f.Close() + return nil, fmt.Errorf("locking stack file: %w", err) + } + if time.Now().After(deadline) { + f.Close() + return nil, &LockError{Err: fmt.Errorf( + "timed out waiting for stack lock after %s — another gh-stack process may be running", LockTimeout)} + } + time.Sleep(lockRetryInterval) + } +} + +// Unlock releases the lock. The lock file is intentionally left on disk to +// avoid a race where another process opens the same path, blocks on flock, +// then wakes up holding a lock on an unlinked inode while a third process +// creates a new file and locks a different inode. +func (l *FileLock) Unlock() { + if l == nil || l.f == nil { + return + } + unlockFile(l.f) + l.f.Close() +} diff --git a/internal/stack/lock_test.go b/internal/stack/lock_test.go new file mode 100644 index 0000000..6afefc6 --- /dev/null +++ b/internal/stack/lock_test.go @@ -0,0 +1,244 @@ +package stack + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLock_Basic(t *testing.T) { + dir := t.TempDir() + + lock, err := Lock(dir) + require.NoError(t, err) + require.NotNil(t, lock) + + lock.Unlock() +} + +func TestLock_NilUnlockSafe(t *testing.T) { + // Unlock on nil should not panic. + var lock *FileLock + lock.Unlock() +} + +func TestLock_BlocksUntilReleased(t *testing.T) { + dir := t.TempDir() + + lock1, err := Lock(dir) + require.NoError(t, err) + + acquired := make(chan struct{}) + errCh := make(chan error, 1) + go func() { + lock2, err := Lock(dir) + if err != nil { + errCh <- err + return + } + close(acquired) + lock2.Unlock() + }() + + // lock2 should be blocked while lock1 is held. + select { + case <-acquired: + t.Fatal("lock2 acquired while lock1 was still held") + case err := <-errCh: + t.Fatalf("lock2 failed: %v", err) + case <-time.After(300 * time.Millisecond): + // expected — lock2 is waiting + } + + lock1.Unlock() + + // After releasing lock1, lock2 should acquire promptly. + select { + case <-acquired: + // success + case err := <-errCh: + t.Fatalf("lock2 failed after lock1 released: %v", err) + case <-time.After(5 * time.Second): + t.Fatal("lock2 did not acquire after lock1 was released") + } +} + +func TestLock_SerializesConcurrentAccess(t *testing.T) { + dir := t.TempDir() + + // Write an initial stack file with 0 stacks. + sf := &StackFile{SchemaVersion: 1, Stacks: []Stack{}} + require.NoError(t, Save(dir, sf)) + + // Run 10 concurrent goroutines, each adding a stack under lock. + // Uses Lock + Load + writeStackFile for atomic read-modify-write. + errCh := make(chan error, 10) + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func(idx int) { + defer wg.Done() + + lock, err := Lock(dir) + if err != nil { + errCh <- fmt.Errorf("goroutine %d Lock: %w", idx, err) + return + } + defer lock.Unlock() + + loaded, err := Load(dir) + if err != nil { + errCh <- fmt.Errorf("goroutine %d Load: %w", idx, err) + return + } + + loaded.AddStack(makeStack("main", "branch")) + if err := writeStackFile(dir, loaded); err != nil { + errCh <- fmt.Errorf("goroutine %d writeStackFile: %w", idx, err) + } + }(i) + } + wg.Wait() + close(errCh) + + for err := range errCh { + t.Error(err) + } + + // All 10 stacks should be present — no lost updates. + final, err := Load(dir) + require.NoError(t, err) + assert.Len(t, final.Stacks, 10, "all concurrent writes should be preserved") +} + +func TestLock_FileLeftOnDisk(t *testing.T) { + dir := t.TempDir() + + lock, err := Lock(dir) + require.NoError(t, err) + lock.Unlock() + + // Lock file should still exist after unlock (no os.Remove race). + _, err = os.Stat(filepath.Join(dir, lockFileName)) + require.NoError(t, err, "lock file should remain on disk after unlock") + + lock2, err := Lock(dir) + require.NoError(t, err, "should be able to re-lock after unlock") + lock2.Unlock() +} + +func TestLock_TimesOut(t *testing.T) { + dir := t.TempDir() + + // Hold the lock so the second attempt can never acquire it. + lock1, err := Lock(dir) + require.NoError(t, err) + defer lock1.Unlock() + + // Save original timeout and set a short one for the test. + origTimeout := LockTimeout + LockTimeout = 200 * time.Millisecond + defer func() { LockTimeout = origTimeout }() + + start := time.Now() + lock2, err := Lock(dir) + elapsed := time.Since(start) + + assert.Nil(t, lock2, "should not acquire lock") + require.Error(t, err) + + var lockErr *LockError + require.True(t, errors.As(err, &lockErr), "error should be *LockError, got %T", err) + assert.Contains(t, lockErr.Error(), "timed out") + + // Should have waited roughly LockTimeout before giving up. + assert.GreaterOrEqual(t, elapsed, 150*time.Millisecond, "should wait near the timeout") +} + +func TestSave_DetectsStaleFile(t *testing.T) { + dir := t.TempDir() + + // Write an initial stack file. + sf := &StackFile{SchemaVersion: 1, Stacks: []Stack{}} + require.NoError(t, Save(dir, sf)) + + // Load — captures the on-disk checksum. + loaded, err := Load(dir) + require.NoError(t, err) + + // Simulate another process: load, modify, save. + other, err := Load(dir) + require.NoError(t, err) + other.AddStack(makeStack("main", "sneaky")) + require.NoError(t, Save(dir, other)) + + // Our loaded copy tries to save — should detect staleness. + loaded.AddStack(makeStack("main", "my-branch")) + err = Save(dir, loaded) + require.Error(t, err) + + var staleErr *StaleError + require.True(t, errors.As(err, &staleErr), "error should be *StaleError, got %T", err) + assert.Contains(t, staleErr.Error(), "modified by another process") +} + +func TestSave_AllowsWriteWhenFileUnchanged(t *testing.T) { + dir := t.TempDir() + + // Write, load, modify, save — no concurrent changes. + sf := &StackFile{SchemaVersion: 1, Stacks: []Stack{}} + require.NoError(t, Save(dir, sf)) + + loaded, err := Load(dir) + require.NoError(t, err) + + loaded.AddStack(makeStack("main", "feature")) + require.NoError(t, Save(dir, loaded)) + + // Verify the write actually persisted. + final, err := Load(dir) + require.NoError(t, err) + assert.Len(t, final.Stacks, 1) +} + +func TestSave_AllowsFirstWrite(t *testing.T) { + dir := t.TempDir() + + // File doesn't exist — Load returns nil checksum, Save should succeed. + sf, err := Load(dir) + require.NoError(t, err) + assert.Empty(t, sf.Stacks) + + sf.AddStack(makeStack("main", "first")) + require.NoError(t, Save(dir, sf), "first save to a new file should succeed") + + final, err := Load(dir) + require.NoError(t, err) + assert.Len(t, final.Stacks, 1) +} + +func TestSave_DoubleSaveSucceeds(t *testing.T) { + dir := t.TempDir() + + sf, err := Load(dir) + require.NoError(t, err) + + sf.AddStack(makeStack("main", "first")) + require.NoError(t, Save(dir, sf), "first save should succeed") + + // A second Save on the same instance must not spuriously fail — + // writeStackFile refreshes loadChecksum after writing. + sf.AddStack(makeStack("main", "second")) + require.NoError(t, Save(dir, sf), "second save on same instance should succeed") + + final, err := Load(dir) + require.NoError(t, err) + assert.Len(t, final.Stacks, 2) +} diff --git a/internal/stack/lock_unix.go b/internal/stack/lock_unix.go new file mode 100644 index 0000000..c157961 --- /dev/null +++ b/internal/stack/lock_unix.go @@ -0,0 +1,23 @@ +//go:build !windows + +package stack + +import ( + "os" + "syscall" +) + +// tryLockFile attempts a non-blocking exclusive lock. +// Returns nil on success, or an error if the lock is held by another process. +func tryLockFile(f *os.File) error { + return syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB) +} + +// isLockBusy reports whether err indicates the lock is held by another process. +func isLockBusy(err error) bool { + return err == syscall.EWOULDBLOCK +} + +func unlockFile(f *os.File) { + _ = syscall.Flock(int(f.Fd()), syscall.LOCK_UN) +} diff --git a/internal/stack/lock_windows.go b/internal/stack/lock_windows.go new file mode 100644 index 0000000..0b7d033 --- /dev/null +++ b/internal/stack/lock_windows.go @@ -0,0 +1,37 @@ +//go:build windows + +package stack + +import ( + "errors" + "os" + + "golang.org/x/sys/windows" +) + +// tryLockFile attempts a non-blocking exclusive lock. +// Returns nil on success, or an error if the lock is held by another process. +func tryLockFile(f *os.File) error { + ol := new(windows.Overlapped) + return windows.LockFileEx( + windows.Handle(f.Fd()), + windows.LOCKFILE_EXCLUSIVE_LOCK|windows.LOCKFILE_FAIL_IMMEDIATELY, + 0, // reserved + 1, // lock 1 byte + 0, // high word + ol, + ) +} + +// isLockBusy reports whether err indicates the lock is held by another process. +func isLockBusy(err error) bool { + return errors.Is(err, windows.ERROR_LOCK_VIOLATION) +} + +func unlockFile(f *os.File) { + ol := new(windows.Overlapped) + _ = windows.UnlockFileEx( + windows.Handle(f.Fd()), + 0, 1, 0, ol, + ) +} diff --git a/internal/stack/stack.go b/internal/stack/stack.go index 4dca5a6..5405c7a 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -1,6 +1,8 @@ package stack import ( + "bytes" + "crypto/sha256" "encoding/json" "errors" "fmt" @@ -168,6 +170,11 @@ type StackFile struct { SchemaVersion int `json:"schemaVersion"` Repository string `json:"repository"` Stacks []Stack `json:"stacks"` + + // loadChecksum is the SHA-256 of the raw file bytes at Load time. + // Save uses it to detect concurrent modifications (optimistic concurrency). + // nil means the file did not exist when loaded. + loadChecksum []byte } // FindAllStacksForBranch returns all stacks that contain the given branch. @@ -233,11 +240,14 @@ func stackFilePath(gitDir string) string { // Load reads the stack file from the given git directory. // Returns an empty StackFile if the file does not exist. +// The returned StackFile records a checksum of the on-disk content so that +// Save can detect concurrent modifications. func Load(gitDir string) (*StackFile, error) { path := stackFilePath(gitDir) data, err := os.ReadFile(path) if err != nil { if errors.Is(err, os.ErrNotExist) { + // loadChecksum stays nil — sentinel for "file absent at load time". return &StackFile{ SchemaVersion: schemaVersion, Stacks: []Stack{}, @@ -255,11 +265,87 @@ func Load(gitDir string) (*StackFile, error) { return nil, fmt.Errorf("stack file has schema version %d, but this version of gh-stack only supports up to version %d — please upgrade gh-stack", sf.SchemaVersion, schemaVersion) } + sum := sha256.Sum256(data) + sf.loadChecksum = sum[:] return &sf, nil } -// Save writes the stack file to the given git directory. +// Save acquires an exclusive lock on the stack file, verifies the file hasn't +// been modified since Load (optimistic concurrency), writes sf as JSON, and +// releases the lock. The lock is held only for the read-compare-write window. +// Returns *LockError if the lock times out, or *StaleError if another process +// modified the file since it was loaded. func Save(gitDir string, sf *StackFile) error { + lock, err := Lock(gitDir) + if err != nil { + return err // *LockError for contention, plain error for I/O failures + } + defer lock.Unlock() + + if err := checkStale(gitDir, sf); err != nil { + return err + } + return writeStackFile(gitDir, sf) +} + +// SaveNonBlocking attempts to save without blocking. If another process holds +// the lock or the file was modified since Load, the save is silently skipped. +// Use this for best-effort metadata persistence (e.g. syncing PR state in view). +func SaveNonBlocking(gitDir string, sf *StackFile) { + path := filepath.Join(gitDir, lockFileName) + f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0644) + if err != nil { + return + } + if tryLockFile(f) != nil { + f.Close() + return + } + lock := &FileLock{f: f} + defer lock.Unlock() + + if checkStale(gitDir, sf) != nil { + return + } + _ = writeStackFile(gitDir, sf) +} + +// checkStale compares the current on-disk content against the checksum +// captured at Load time. Returns *StaleError if the file was modified +// by another process. The caller must hold the lock. +func checkStale(gitDir string, sf *StackFile) error { + path := stackFilePath(gitDir) + data, err := os.ReadFile(path) + + if errors.Is(err, os.ErrNotExist) { + // File absent on disk. + if sf.loadChecksum == nil { + return nil // was absent at Load time too — no conflict + } + // File existed at Load but is now gone. Allow the write to + // recreate it rather than erroring; this is not a lost-update. + return nil + } + if err != nil { + return fmt.Errorf("reading stack file for staleness check: %w", err) + } + + // File exists on disk. + if sf.loadChecksum == nil { + // File was absent at Load but another process created it. + return &StaleError{Err: fmt.Errorf( + "stack file was created by another process since it was loaded")} + } + + sum := sha256.Sum256(data) + if !bytes.Equal(sf.loadChecksum, sum[:]) { + return &StaleError{Err: fmt.Errorf( + "stack file was modified by another process since it was loaded")} + } + return nil +} + +func writeStackFile(gitDir string, sf *StackFile) error { sf.SchemaVersion = schemaVersion if sf.Stacks == nil { sf.Stacks = []Stack{} @@ -272,5 +358,9 @@ func Save(gitDir string, sf *StackFile) error { if err := os.WriteFile(path, data, 0644); err != nil { return fmt.Errorf("writing stack file: %w", err) } + // Refresh checksum so a second Save on the same StackFile doesn't + // spuriously fail the staleness check. + sum := sha256.Sum256(data) + sf.loadChecksum = sum[:] return nil } diff --git a/skills/gh-stack/SKILL.md b/skills/gh-stack/SKILL.md index 482b98b..4dd113a 100644 --- a/skills/gh-stack/SKILL.md +++ b/skills/gh-stack/SKILL.md @@ -732,6 +732,7 @@ gh stack unstack feature-auth --local | 5 | Invalid arguments | Fix the command invocation (check flags and arguments) | | 6 | Disambiguation required | A branch belongs to multiple stacks. Run `gh stack checkout ` to switch to a non-shared branch first | | 7 | Rebase already in progress | Run `gh stack rebase --continue` (after resolving conflicts) or `gh stack rebase --abort` to start over | +| 8 | Stack is locked | Another `gh stack` process is writing the stack file. Wait and retry — the lock times out after 5 seconds | ## Known limitations