From e568d7f00503b7c9aa94401b8416f79f63a61edb Mon Sep 17 00:00:00 2001 From: Sameen Karim Date: Tue, 24 Mar 2026 15:12:05 -0400 Subject: [PATCH 1/6] add file locking to prevent concurrent stack file edits --- cmd/add.go | 1 + cmd/init.go | 6 ++++ cmd/merge.go | 1 + cmd/navigate.go | 2 ++ cmd/push.go | 6 ++++ cmd/rebase.go | 7 ++++ cmd/sync.go | 1 + cmd/unstack.go | 1 + cmd/utils.go | 16 ++++++++++ cmd/view.go | 1 + internal/stack/lock.go | 52 ++++++++++++++++++++++++++++++ internal/stack/lock_test.go | 58 ++++++++++++++++++++++++++++++++++ internal/stack/lock_unix.go | 16 ++++++++++ internal/stack/lock_windows.go | 30 ++++++++++++++++++ 14 files changed, 198 insertions(+) create mode 100644 internal/stack/lock.go create mode 100644 internal/stack/lock_test.go create mode 100644 internal/stack/lock_unix.go create mode 100644 internal/stack/lock_windows.go diff --git a/cmd/add.go b/cmd/add.go index 9cb8dd0..d377440 100644 --- a/cmd/add.go +++ b/cmd/add.go @@ -53,6 +53,7 @@ func runAdd(cfg *config.Config, opts *addOptions, args []string) error { if err != nil { return ErrNotInStack } + defer result.Lock.Unlock() gitDir := result.GitDir sf := result.StackFile s := result.Stack diff --git a/cmd/init.go b/cmd/init.go index 24e3dae..22138c7 100644 --- a/cmd/init.go +++ b/cmd/init.go @@ -73,6 +73,12 @@ func runInit(cfg *config.Config, opts *initOptions) error { } // Load existing stack file + lock, err := stack.Lock(gitDir) + if err != nil { + cfg.Warningf("could not acquire stack lock: %s", err) + } + defer lock.Unlock() + sf, err := stack.Load(gitDir) if err != nil { cfg.Errorf("failed to load stack state: %s", err) diff --git a/cmd/merge.go b/cmd/merge.go index 74c452d..5d9667e 100644 --- a/cmd/merge.go +++ b/cmd/merge.go @@ -37,6 +37,7 @@ func runMerge(cfg *config.Config, target string) error { if err != nil { return ErrNotInStack } + defer result.Lock.Unlock() s := result.Stack currentBranch := result.CurrentBranch diff --git a/cmd/navigate.go b/cmd/navigate.go index aad9c7b..cb8168e 100644 --- a/cmd/navigate.go +++ b/cmd/navigate.go @@ -73,6 +73,7 @@ func runNavigate(cfg *config.Config, delta int) error { if err != nil { return ErrNotInStack } + defer result.Lock.Unlock() s := result.Stack currentBranch := result.CurrentBranch @@ -187,6 +188,7 @@ func runNavigateToEnd(cfg *config.Config, top bool) error { if err != nil { return ErrNotInStack } + defer result.Lock.Unlock() s := result.Stack currentBranch := result.CurrentBranch diff --git a/cmd/push.go b/cmd/push.go index eb08f46..b85ccde 100644 --- a/cmd/push.go +++ b/cmd/push.go @@ -45,6 +45,12 @@ func runPush(cfg *config.Config, opts *pushOptions) error { return ErrNotInStack } + lock, err := stack.Lock(gitDir) + if err != nil { + cfg.Warningf("could not acquire stack lock: %s", err) + } + defer lock.Unlock() + sf, err := stack.Load(gitDir) if err != nil { cfg.Errorf("failed to load stack state: %s", err) diff --git a/cmd/rebase.go b/cmd/rebase.go index 406f78d..b7431af 100644 --- a/cmd/rebase.go +++ b/cmd/rebase.go @@ -86,6 +86,7 @@ func runRebase(cfg *config.Config, opts *rebaseOptions) error { if err != nil { return ErrNotInStack } + defer result.Lock.Unlock() sf := result.StackFile s := result.Stack currentBranch := result.CurrentBranch @@ -337,6 +338,12 @@ func continueRebase(cfg *config.Config, gitDir string) error { return ErrSilent } + lock, err := stack.Lock(gitDir) + if err != nil { + cfg.Warningf("could not acquire stack lock: %s", err) + } + defer lock.Unlock() + sf, err := stack.Load(gitDir) if err != nil { cfg.Errorf("failed to load stack state: %s", err) diff --git a/cmd/sync.go b/cmd/sync.go index 840f6dd..b3e2f9b 100644 --- a/cmd/sync.go +++ b/cmd/sync.go @@ -49,6 +49,7 @@ func runSync(cfg *config.Config, opts *syncOptions) error { if err != nil { return ErrNotInStack } + defer result.Lock.Unlock() gitDir := result.GitDir sf := result.StackFile s := result.Stack diff --git a/cmd/unstack.go b/cmd/unstack.go index a8c6a12..e9b2da5 100644 --- a/cmd/unstack.go +++ b/cmd/unstack.go @@ -37,6 +37,7 @@ func runUnstack(cfg *config.Config, opts *unstackOptions) error { if err != nil { return ErrNotInStack } + defer result.Lock.Unlock() gitDir := result.GitDir sf := result.StackFile s := result.Stack diff --git a/cmd/utils.go b/cmd/utils.go index df0a407..305ba12 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -70,6 +70,7 @@ type loadStackResult struct { StackFile *stack.StackFile Stack *stack.Stack CurrentBranch string + Lock *stack.FileLock // caller must defer Lock.Unlock() if non-nil } // loadStack is the standard way to obtain a Stack for the current (or given) @@ -84,6 +85,19 @@ func loadStack(cfg *config.Config, branch string) (*loadStackResult, error) { return nil, fmt.Errorf("not a git repository") } + lock, err := stack.Lock(gitDir) + if err != nil { + cfg.Warningf("could not acquire stack lock: %s", err) + // Proceed without lock — better than failing entirely. + } + // Release the lock if we fail before returning it to the caller. + success := false + defer func() { + if !success && lock != nil { + lock.Unlock() + } + }() + sf, err := stack.Load(gitDir) if err != nil { cfg.Errorf("failed to load stack state: %s", err) @@ -125,11 +139,13 @@ func loadStack(cfg *config.Config, branch string) (*loadStackResult, error) { return nil, fmt.Errorf("failed to get current branch: %w", err) } + success = true return &loadStackResult{ GitDir: gitDir, StackFile: sf, Stack: s, CurrentBranch: currentBranch, + Lock: lock, }, nil } diff --git a/cmd/view.go b/cmd/view.go index 11116ac..e59e3de 100644 --- a/cmd/view.go +++ b/cmd/view.go @@ -45,6 +45,7 @@ func runView(cfg *config.Config, opts *viewOptions) error { if err != nil { return ErrNotInStack } + defer result.Lock.Unlock() gitDir := result.GitDir sf := result.StackFile s := result.Stack diff --git a/internal/stack/lock.go b/internal/stack/lock.go new file mode 100644 index 0000000..3d9dc61 --- /dev/null +++ b/internal/stack/lock.go @@ -0,0 +1,52 @@ +package stack + +import ( + "fmt" + "os" + "path/filepath" +) + +const lockFileName = "gh-stack.lock" + +// FileLock provides an exclusive advisory lock on the stack file to prevent +// concurrent Load-Modify-Save races between multiple gh-stack processes. +type FileLock struct { + f *os.File + path string +} + +// Lock acquires an exclusive lock on the stack file in the given git directory. +// Callers must defer Unlock() to release the lock. +// +// Usage: +// +// lock, err := stack.Lock(gitDir) +// if err != nil { ... } +// defer lock.Unlock() +// sf, err := stack.Load(gitDir) +// // ... modify sf ... +// stack.Save(gitDir, sf) +func Lock(gitDir string) (*FileLock, error) { + path := filepath.Join(gitDir, lockFileName) + f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0644) + if err != nil { + return nil, fmt.Errorf("opening lock file: %w", err) + } + + if err := lockFile(f); err != nil { + f.Close() + return nil, fmt.Errorf("acquiring lock: %w", err) + } + + return &FileLock{f: f, path: path}, nil +} + +// Unlock releases the lock and removes the lock file. +func (l *FileLock) Unlock() { + if l == nil || l.f == nil { + return + } + unlockFile(l.f) + l.f.Close() + os.Remove(l.path) +} diff --git a/internal/stack/lock_test.go b/internal/stack/lock_test.go new file mode 100644 index 0000000..49e5ebd --- /dev/null +++ b/internal/stack/lock_test.go @@ -0,0 +1,58 @@ +package stack + +import ( + "sync" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLock_Basic(t *testing.T) { + dir := t.TempDir() + + lock, err := Lock(dir) + require.NoError(t, err) + require.NotNil(t, lock) + + lock.Unlock() +} + +func TestLock_NilUnlockSafe(t *testing.T) { + // Unlock on nil should not panic. + var lock *FileLock + lock.Unlock() +} + +func TestLock_SerializesConcurrentAccess(t *testing.T) { + dir := t.TempDir() + + // Write an initial stack file with 0 stacks. + sf := &StackFile{SchemaVersion: 1, Stacks: []Stack{}} + require.NoError(t, Save(dir, sf)) + + // Run 10 concurrent goroutines, each adding a stack under lock. + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func(idx int) { + defer wg.Done() + + lock, err := Lock(dir) + require.NoError(t, err) + defer lock.Unlock() + + loaded, err := Load(dir) + require.NoError(t, err) + + loaded.AddStack(makeStack("main", "branch")) + require.NoError(t, Save(dir, loaded)) + }(i) + } + wg.Wait() + + // All 10 stacks should be present — no lost updates. + final, err := Load(dir) + require.NoError(t, err) + assert.Len(t, final.Stacks, 10, "all concurrent writes should be preserved") +} diff --git a/internal/stack/lock_unix.go b/internal/stack/lock_unix.go new file mode 100644 index 0000000..a4014b3 --- /dev/null +++ b/internal/stack/lock_unix.go @@ -0,0 +1,16 @@ +//go:build !windows + +package stack + +import ( + "os" + "syscall" +) + +func lockFile(f *os.File) error { + return syscall.Flock(int(f.Fd()), syscall.LOCK_EX) +} + +func unlockFile(f *os.File) { + _ = syscall.Flock(int(f.Fd()), syscall.LOCK_UN) +} diff --git a/internal/stack/lock_windows.go b/internal/stack/lock_windows.go new file mode 100644 index 0000000..efae1d1 --- /dev/null +++ b/internal/stack/lock_windows.go @@ -0,0 +1,30 @@ +//go:build windows + +package stack + +import ( + "os" + + "golang.org/x/sys/windows" +) + +func lockFile(f *os.File) error { + // Lock the first byte exclusively (blocks until available). + ol := new(windows.Overlapped) + return windows.LockFileEx( + windows.Handle(f.Fd()), + windows.LOCKFILE_EXCLUSIVE_LOCK, + 0, // reserved + 1, // lock 1 byte + 0, // high word + ol, + ) +} + +func unlockFile(f *os.File) { + ol := new(windows.Overlapped) + _ = windows.UnlockFileEx( + windows.Handle(f.Fd()), + 0, 1, 0, ol, + ) +} From 211e563d1f7991219202251c850a53ee3064275b Mon Sep 17 00:00:00 2001 From: Sameen Karim Date: Wed, 25 Mar 2026 20:06:18 -0400 Subject: [PATCH 2/6] lock failure exit code --- README.md | 1 + cmd/add.go | 4 +++ cmd/init.go | 3 +- cmd/merge.go | 4 +++ cmd/navigate.go | 6 ++-- cmd/push.go | 3 +- cmd/rebase.go | 6 +++- cmd/sync.go | 3 ++ cmd/unstack.go | 5 +++ cmd/utils.go | 64 ++++++++++++++++++++++++++++++++-- cmd/view.go | 4 +++ internal/stack/lock.go | 35 ++++++++++++++----- internal/stack/lock_test.go | 47 +++++++++++++++++++++++++ internal/stack/lock_unix.go | 6 ++-- internal/stack/lock_windows.go | 13 +++---- skills/gh-stack/SKILL.md | 1 + 16 files changed, 178 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 9984ccd..103cdbd 100644 --- a/README.md +++ b/README.md @@ -467,3 +467,4 @@ Compared to the typical workflow, there's no need to name branches, run `git add | 5 | Invalid arguments or flags | | 6 | Disambiguation required (branch belongs to multiple stacks) | | 7 | Rebase already in progress | +| 8 | Stack is locked by another process | diff --git a/cmd/add.go b/cmd/add.go index d377440..9e89024 100644 --- a/cmd/add.go +++ b/cmd/add.go @@ -1,6 +1,7 @@ package cmd import ( + "errors" "fmt" "github.com/cli/go-gh/v2/pkg/prompter" @@ -51,6 +52,9 @@ func runAdd(cfg *config.Config, opts *addOptions, args []string) error { result, err := loadStack(cfg, "") if err != nil { + if errors.Is(err, ErrLockFailed) { + return ErrLockFailed + } return ErrNotInStack } defer result.Lock.Unlock() diff --git a/cmd/init.go b/cmd/init.go index 22138c7..6c2ef10 100644 --- a/cmd/init.go +++ b/cmd/init.go @@ -75,7 +75,8 @@ func runInit(cfg *config.Config, opts *initOptions) error { // Load existing stack file lock, err := stack.Lock(gitDir) if err != nil { - cfg.Warningf("could not acquire stack lock: %s", err) + cfg.Errorf("another process is currently editing the stack — try again later") + return ErrLockFailed } defer lock.Unlock() diff --git a/cmd/merge.go b/cmd/merge.go index 5d9667e..961a74c 100644 --- a/cmd/merge.go +++ b/cmd/merge.go @@ -1,6 +1,7 @@ package cmd import ( + "errors" "fmt" "github.com/cli/go-gh/v2/pkg/browser" @@ -35,6 +36,9 @@ func runMerge(cfg *config.Config, target string) error { // Standard stack loading and validation. result, err := loadStack(cfg, "") if err != nil { + if errors.Is(err, ErrLockFailed) { + return ErrLockFailed + } return ErrNotInStack } defer result.Lock.Unlock() diff --git a/cmd/navigate.go b/cmd/navigate.go index cb8168e..ad6080a 100644 --- a/cmd/navigate.go +++ b/cmd/navigate.go @@ -69,11 +69,10 @@ func BottomCmd(cfg *config.Config) *cobra.Command { } func runNavigate(cfg *config.Config, delta int) error { - result, err := loadStack(cfg, "") + result, err := loadStackReadOnly(cfg, "") if err != nil { return ErrNotInStack } - defer result.Lock.Unlock() s := result.Stack currentBranch := result.CurrentBranch @@ -184,11 +183,10 @@ func runNavigate(cfg *config.Config, delta int) error { } func runNavigateToEnd(cfg *config.Config, top bool) error { - result, err := loadStack(cfg, "") + result, err := loadStackReadOnly(cfg, "") if err != nil { return ErrNotInStack } - defer result.Lock.Unlock() s := result.Stack currentBranch := result.CurrentBranch diff --git a/cmd/push.go b/cmd/push.go index b85ccde..6fc8271 100644 --- a/cmd/push.go +++ b/cmd/push.go @@ -47,7 +47,8 @@ func runPush(cfg *config.Config, opts *pushOptions) error { lock, err := stack.Lock(gitDir) if err != nil { - cfg.Warningf("could not acquire stack lock: %s", err) + cfg.Errorf("another process is currently editing the stack — try again later") + return ErrLockFailed } defer lock.Unlock() diff --git a/cmd/rebase.go b/cmd/rebase.go index b7431af..755bb80 100644 --- a/cmd/rebase.go +++ b/cmd/rebase.go @@ -84,6 +84,9 @@ func runRebase(cfg *config.Config, opts *rebaseOptions) error { result, err := loadStack(cfg, opts.branch) if err != nil { + if errors.Is(err, ErrLockFailed) { + return ErrLockFailed + } return ErrNotInStack } defer result.Lock.Unlock() @@ -340,7 +343,8 @@ func continueRebase(cfg *config.Config, gitDir string) error { lock, err := stack.Lock(gitDir) if err != nil { - cfg.Warningf("could not acquire stack lock: %s", err) + cfg.Errorf("another process is currently editing the stack — try again later") + return ErrLockFailed } defer lock.Unlock() diff --git a/cmd/sync.go b/cmd/sync.go index b3e2f9b..9a5e15d 100644 --- a/cmd/sync.go +++ b/cmd/sync.go @@ -47,6 +47,9 @@ conflicts interactively.`, func runSync(cfg *config.Config, opts *syncOptions) error { result, err := loadStack(cfg, "") if err != nil { + if errors.Is(err, ErrLockFailed) { + return ErrLockFailed + } return ErrNotInStack } defer result.Lock.Unlock() diff --git a/cmd/unstack.go b/cmd/unstack.go index e9b2da5..b534ddd 100644 --- a/cmd/unstack.go +++ b/cmd/unstack.go @@ -1,6 +1,8 @@ package cmd import ( + "errors" + "github.com/github/gh-stack/internal/config" "github.com/github/gh-stack/internal/stack" "github.com/spf13/cobra" @@ -35,6 +37,9 @@ func UnstackCmd(cfg *config.Config) *cobra.Command { func runUnstack(cfg *config.Config, opts *unstackOptions) error { result, err := loadStack(cfg, opts.target) if err != nil { + if errors.Is(err, ErrLockFailed) { + return ErrLockFailed + } return ErrNotInStack } defer result.Lock.Unlock() diff --git a/cmd/utils.go b/cmd/utils.go index 305ba12..9c60e5b 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -26,6 +26,7 @@ var ( ErrInvalidArgs = &ExitError{Code: 5} // invalid arguments or flags ErrDisambiguate = &ExitError{Code: 6} // multiple stacks/remotes, can't auto-select ErrRebaseActive = &ExitError{Code: 7} // rebase already in progress + ErrLockFailed = &ExitError{Code: 8} // could not acquire stack file lock ) // ExitError is returned by commands to indicate a specific exit code. @@ -87,13 +88,13 @@ func loadStack(cfg *config.Config, branch string) (*loadStackResult, error) { lock, err := stack.Lock(gitDir) if err != nil { - cfg.Warningf("could not acquire stack lock: %s", err) - // Proceed without lock — better than failing entirely. + cfg.Errorf("another process is currently editing the stack — try again later") + return nil, ErrLockFailed } // Release the lock if we fail before returning it to the caller. success := false defer func() { - if !success && lock != nil { + if !success { lock.Unlock() } }() @@ -149,6 +150,63 @@ func loadStack(cfg *config.Config, branch string) (*loadStackResult, error) { }, nil } +// loadStackReadOnly loads the stack without acquiring an exclusive lock. +// Use this for commands that only read the stack and never call Save(). +func loadStackReadOnly(cfg *config.Config, branch string) (*loadStackResult, error) { + gitDir, err := git.GitDir() + if err != nil { + cfg.Errorf("not a git repository") + return nil, fmt.Errorf("not a git repository") + } + + sf, err := stack.Load(gitDir) + if err != nil { + cfg.Errorf("failed to load stack state: %s", err) + return nil, fmt.Errorf("failed to load stack state: %w", err) + } + + branchFromArg := branch != "" + if branch == "" { + branch, err = git.CurrentBranch() + if err != nil { + cfg.Errorf("failed to get current branch: %s", err) + return nil, fmt.Errorf("failed to get current branch: %w", err) + } + } + + s, err := resolveStack(sf, branch, cfg) + if err != nil { + if errors.Is(err, errInterrupt) { + return nil, errInterrupt + } + cfg.Errorf("%s", err) + return nil, err + } + if s == nil { + if branchFromArg { + cfg.Errorf("branch %q is not part of a stack", branch) + } else { + cfg.Errorf("current branch %q is not part of a stack", branch) + } + cfg.Printf("Checkout an existing stack using `%s` or create a new stack using `%s`", + cfg.ColorCyan("gh stack checkout"), cfg.ColorCyan("gh stack init")) + return nil, fmt.Errorf("branch %q is not part of a stack", branch) + } + + currentBranch, err := git.CurrentBranch() + if err != nil { + cfg.Errorf("failed to get current branch: %s", err) + return nil, fmt.Errorf("failed to get current branch: %w", err) + } + + return &loadStackResult{ + GitDir: gitDir, + StackFile: sf, + Stack: s, + CurrentBranch: currentBranch, + }, nil +} + // resolveStack finds the stack for the given branch, handling ambiguity when // a branch (typically a trunk) belongs to multiple stacks. If exactly one // stack matches, it is returned directly. If multiple stacks match, the user diff --git a/cmd/view.go b/cmd/view.go index e59e3de..d9a13fb 100644 --- a/cmd/view.go +++ b/cmd/view.go @@ -3,6 +3,7 @@ package cmd import ( "bytes" "encoding/json" + "errors" "fmt" "os" "os/exec" @@ -43,6 +44,9 @@ func ViewCmd(cfg *config.Config) *cobra.Command { func runView(cfg *config.Config, opts *viewOptions) error { result, err := loadStack(cfg, "") if err != nil { + if errors.Is(err, ErrLockFailed) { + return ErrLockFailed + } return ErrNotInStack } defer result.Lock.Unlock() diff --git a/internal/stack/lock.go b/internal/stack/lock.go index 3d9dc61..9e1d163 100644 --- a/internal/stack/lock.go +++ b/internal/stack/lock.go @@ -4,18 +4,27 @@ import ( "fmt" "os" "path/filepath" + "time" ) const lockFileName = "gh-stack.lock" +// LockTimeout is how long Lock() will wait for the exclusive lock before +// giving up. This prevents processes (including AI agents) from hanging +// indefinitely when another instance holds the lock. +const LockTimeout = 30 * time.Second + +// lockRetryInterval is the sleep between non-blocking lock attempts. +const lockRetryInterval = 100 * time.Millisecond + // FileLock provides an exclusive advisory lock on the stack file to prevent // concurrent Load-Modify-Save races between multiple gh-stack processes. type FileLock struct { - f *os.File - path string + f *os.File } // Lock acquires an exclusive lock on the stack file in the given git directory. +// It retries with a non-blocking attempt every 100ms for up to LockTimeout. // Callers must defer Unlock() to release the lock. // // Usage: @@ -33,20 +42,28 @@ func Lock(gitDir string) (*FileLock, error) { return nil, fmt.Errorf("opening lock file: %w", err) } - if err := lockFile(f); err != nil { - f.Close() - return nil, fmt.Errorf("acquiring lock: %w", err) + deadline := time.Now().Add(LockTimeout) + for { + err := tryLockFile(f) + if err == nil { + return &FileLock{f: f}, nil + } + if time.Now().After(deadline) { + f.Close() + return nil, fmt.Errorf("timed out waiting for stack lock after %s — another gh-stack process may be running", LockTimeout) + } + time.Sleep(lockRetryInterval) } - - return &FileLock{f: f, path: path}, nil } -// Unlock releases the lock and removes the lock file. +// Unlock releases the lock. The lock file is intentionally left on disk to +// avoid a race where another process opens the same path, blocks on flock, +// then wakes up holding a lock on an unlinked inode while a third process +// creates a new file and locks a different inode. func (l *FileLock) Unlock() { if l == nil || l.f == nil { return } unlockFile(l.f) l.f.Close() - os.Remove(l.path) } diff --git a/internal/stack/lock_test.go b/internal/stack/lock_test.go index 49e5ebd..aab70cb 100644 --- a/internal/stack/lock_test.go +++ b/internal/stack/lock_test.go @@ -3,6 +3,7 @@ package stack import ( "sync" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -24,6 +25,39 @@ func TestLock_NilUnlockSafe(t *testing.T) { lock.Unlock() } +func TestLock_BlocksUntilReleased(t *testing.T) { + dir := t.TempDir() + + lock1, err := Lock(dir) + require.NoError(t, err) + + acquired := make(chan struct{}) + go func() { + lock2, err := Lock(dir) + require.NoError(t, err) + close(acquired) + lock2.Unlock() + }() + + // lock2 should be blocked while lock1 is held. + select { + case <-acquired: + t.Fatal("lock2 acquired while lock1 was still held") + case <-time.After(300 * time.Millisecond): + // expected — lock2 is waiting + } + + lock1.Unlock() + + // After releasing lock1, lock2 should acquire promptly. + select { + case <-acquired: + // success + case <-time.After(5 * time.Second): + t.Fatal("lock2 did not acquire after lock1 was released") + } +} + func TestLock_SerializesConcurrentAccess(t *testing.T) { dir := t.TempDir() @@ -56,3 +90,16 @@ func TestLock_SerializesConcurrentAccess(t *testing.T) { require.NoError(t, err) assert.Len(t, final.Stacks, 10, "all concurrent writes should be preserved") } + +func TestLock_FileLeftOnDisk(t *testing.T) { + dir := t.TempDir() + + lock, err := Lock(dir) + require.NoError(t, err) + lock.Unlock() + + // Lock file should still exist after unlock (no os.Remove race). + lock2, err := Lock(dir) + require.NoError(t, err, "should be able to re-lock after unlock") + lock2.Unlock() +} diff --git a/internal/stack/lock_unix.go b/internal/stack/lock_unix.go index a4014b3..4da19da 100644 --- a/internal/stack/lock_unix.go +++ b/internal/stack/lock_unix.go @@ -7,8 +7,10 @@ import ( "syscall" ) -func lockFile(f *os.File) error { - return syscall.Flock(int(f.Fd()), syscall.LOCK_EX) +// tryLockFile attempts a non-blocking exclusive lock. +// Returns nil on success, or an error if the lock is held by another process. +func tryLockFile(f *os.File) error { + return syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB) } func unlockFile(f *os.File) { diff --git a/internal/stack/lock_windows.go b/internal/stack/lock_windows.go index efae1d1..92a2e07 100644 --- a/internal/stack/lock_windows.go +++ b/internal/stack/lock_windows.go @@ -8,15 +8,16 @@ import ( "golang.org/x/sys/windows" ) -func lockFile(f *os.File) error { - // Lock the first byte exclusively (blocks until available). +// tryLockFile attempts a non-blocking exclusive lock. +// Returns nil on success, or an error if the lock is held by another process. +func tryLockFile(f *os.File) error { ol := new(windows.Overlapped) return windows.LockFileEx( windows.Handle(f.Fd()), - windows.LOCKFILE_EXCLUSIVE_LOCK, - 0, // reserved - 1, // lock 1 byte - 0, // high word + windows.LOCKFILE_EXCLUSIVE_LOCK|windows.LOCKFILE_FAIL_IMMEDIATELY, + 0, // reserved + 1, // lock 1 byte + 0, // high word ol, ) } diff --git a/skills/gh-stack/SKILL.md b/skills/gh-stack/SKILL.md index 482b98b..38deb6d 100644 --- a/skills/gh-stack/SKILL.md +++ b/skills/gh-stack/SKILL.md @@ -732,6 +732,7 @@ gh stack unstack feature-auth --local | 5 | Invalid arguments | Fix the command invocation (check flags and arguments) | | 6 | Disambiguation required | A branch belongs to multiple stacks. Run `gh stack checkout ` to switch to a non-shared branch first | | 7 | Rebase already in progress | Run `gh stack rebase --continue` (after resolving conflicts) or `gh stack rebase --abort` to start over | +| 8 | Stack is locked | Another `gh stack` process is editing the stack file. Wait and retry — the lock times out after 30 seconds | ## Known limitations From 37e32a51378cfdf5a6a2b165d30896c81d06dbfd Mon Sep 17 00:00:00 2001 From: Sameen Karim Date: Wed, 25 Mar 2026 20:39:48 -0400 Subject: [PATCH 3/6] only lock for writes --- cmd/add.go | 8 +--- cmd/init.go | 9 +--- cmd/merge.go | 5 --- cmd/navigate.go | 4 +- cmd/push.go | 10 +---- cmd/rebase.go | 11 ----- cmd/sync.go | 7 +--- cmd/unstack.go | 9 +--- cmd/utils.go | 83 ++++++------------------------------- cmd/view.go | 7 +--- internal/stack/lock.go | 28 +++++++------ internal/stack/lock_test.go | 3 +- internal/stack/stack.go | 19 ++++++++- skills/gh-stack/SKILL.md | 2 +- 14 files changed, 58 insertions(+), 147 deletions(-) diff --git a/cmd/add.go b/cmd/add.go index 9e89024..8015c65 100644 --- a/cmd/add.go +++ b/cmd/add.go @@ -1,7 +1,6 @@ package cmd import ( - "errors" "fmt" "github.com/cli/go-gh/v2/pkg/prompter" @@ -52,12 +51,8 @@ func runAdd(cfg *config.Config, opts *addOptions, args []string) error { result, err := loadStack(cfg, "") if err != nil { - if errors.Is(err, ErrLockFailed) { - return ErrLockFailed - } return ErrNotInStack } - defer result.Lock.Unlock() gitDir := result.GitDir sf := result.StackFile s := result.Stack @@ -205,8 +200,7 @@ func runAdd(cfg *config.Config, opts *addOptions, args []string) error { } if err := stack.Save(gitDir, sf); err != nil { - cfg.Errorf("failed to save stack state: %s", err) - return ErrSilent + return handleSaveError(cfg, err) } // Print summary diff --git a/cmd/init.go b/cmd/init.go index 6c2ef10..7c002d2 100644 --- a/cmd/init.go +++ b/cmd/init.go @@ -73,13 +73,6 @@ func runInit(cfg *config.Config, opts *initOptions) error { } // Load existing stack file - lock, err := stack.Lock(gitDir) - if err != nil { - cfg.Errorf("another process is currently editing the stack — try again later") - return ErrLockFailed - } - defer lock.Unlock() - sf, err := stack.Load(gitDir) if err != nil { cfg.Errorf("failed to load stack state: %s", err) @@ -334,7 +327,7 @@ func runInit(cfg *config.Config, opts *initOptions) error { syncStackPRs(cfg, &sf.Stacks[len(sf.Stacks)-1]) if err := stack.Save(gitDir, sf); err != nil { - return err + return handleSaveError(cfg, err) } // Print result diff --git a/cmd/merge.go b/cmd/merge.go index 961a74c..74c452d 100644 --- a/cmd/merge.go +++ b/cmd/merge.go @@ -1,7 +1,6 @@ package cmd import ( - "errors" "fmt" "github.com/cli/go-gh/v2/pkg/browser" @@ -36,12 +35,8 @@ func runMerge(cfg *config.Config, target string) error { // Standard stack loading and validation. result, err := loadStack(cfg, "") if err != nil { - if errors.Is(err, ErrLockFailed) { - return ErrLockFailed - } return ErrNotInStack } - defer result.Lock.Unlock() s := result.Stack currentBranch := result.CurrentBranch diff --git a/cmd/navigate.go b/cmd/navigate.go index ad6080a..aad9c7b 100644 --- a/cmd/navigate.go +++ b/cmd/navigate.go @@ -69,7 +69,7 @@ func BottomCmd(cfg *config.Config) *cobra.Command { } func runNavigate(cfg *config.Config, delta int) error { - result, err := loadStackReadOnly(cfg, "") + result, err := loadStack(cfg, "") if err != nil { return ErrNotInStack } @@ -183,7 +183,7 @@ func runNavigate(cfg *config.Config, delta int) error { } func runNavigateToEnd(cfg *config.Config, top bool) error { - result, err := loadStackReadOnly(cfg, "") + result, err := loadStack(cfg, "") if err != nil { return ErrNotInStack } diff --git a/cmd/push.go b/cmd/push.go index 6fc8271..5e47f74 100644 --- a/cmd/push.go +++ b/cmd/push.go @@ -45,13 +45,6 @@ func runPush(cfg *config.Config, opts *pushOptions) error { return ErrNotInStack } - lock, err := stack.Lock(gitDir) - if err != nil { - cfg.Errorf("another process is currently editing the stack — try again later") - return ErrLockFailed - } - defer lock.Unlock() - sf, err := stack.Load(gitDir) if err != nil { cfg.Errorf("failed to load stack state: %s", err) @@ -187,8 +180,7 @@ func runPush(cfg *config.Config, opts *pushOptions) error { syncStackPRs(cfg, s) if err := stack.Save(gitDir, sf); err != nil { - cfg.Errorf("failed to save stack state: %s", err) - return ErrSilent + return handleSaveError(cfg, err) } cfg.Successf("Pushed and synced %d branches", len(s.ActiveBranches())) diff --git a/cmd/rebase.go b/cmd/rebase.go index 755bb80..406f78d 100644 --- a/cmd/rebase.go +++ b/cmd/rebase.go @@ -84,12 +84,8 @@ func runRebase(cfg *config.Config, opts *rebaseOptions) error { result, err := loadStack(cfg, opts.branch) if err != nil { - if errors.Is(err, ErrLockFailed) { - return ErrLockFailed - } return ErrNotInStack } - defer result.Lock.Unlock() sf := result.StackFile s := result.Stack currentBranch := result.CurrentBranch @@ -341,13 +337,6 @@ func continueRebase(cfg *config.Config, gitDir string) error { return ErrSilent } - lock, err := stack.Lock(gitDir) - if err != nil { - cfg.Errorf("another process is currently editing the stack — try again later") - return ErrLockFailed - } - defer lock.Unlock() - sf, err := stack.Load(gitDir) if err != nil { cfg.Errorf("failed to load stack state: %s", err) diff --git a/cmd/sync.go b/cmd/sync.go index 9a5e15d..edbc60f 100644 --- a/cmd/sync.go +++ b/cmd/sync.go @@ -47,12 +47,8 @@ conflicts interactively.`, func runSync(cfg *config.Config, opts *syncOptions) error { result, err := loadStack(cfg, "") if err != nil { - if errors.Is(err, ErrLockFailed) { - return ErrLockFailed - } return ErrNotInStack } - defer result.Lock.Unlock() gitDir := result.GitDir sf := result.StackFile s := result.Stack @@ -292,8 +288,7 @@ func runSync(cfg *config.Config, opts *syncOptions) error { updateBaseSHAs(s) if err := stack.Save(gitDir, sf); err != nil { - cfg.Errorf("failed to save stack state: %s", err) - return ErrSilent + return handleSaveError(cfg, err) } cfg.Printf("") diff --git a/cmd/unstack.go b/cmd/unstack.go index b534ddd..0be869c 100644 --- a/cmd/unstack.go +++ b/cmd/unstack.go @@ -1,8 +1,6 @@ package cmd import ( - "errors" - "github.com/github/gh-stack/internal/config" "github.com/github/gh-stack/internal/stack" "github.com/spf13/cobra" @@ -37,12 +35,8 @@ func UnstackCmd(cfg *config.Config) *cobra.Command { func runUnstack(cfg *config.Config, opts *unstackOptions) error { result, err := loadStack(cfg, opts.target) if err != nil { - if errors.Is(err, ErrLockFailed) { - return ErrLockFailed - } return ErrNotInStack } - defer result.Lock.Unlock() gitDir := result.GitDir sf := result.StackFile s := result.Stack @@ -56,8 +50,7 @@ func runUnstack(cfg *config.Config, opts *unstackOptions) error { // Remove from local tracking sf.RemoveStackForBranch(target) if err := stack.Save(gitDir, sf); err != nil { - cfg.Errorf("failed to save stack state: %s", err) - return ErrSilent + return handleSaveError(cfg, err) } cfg.Successf("Stack removed from local tracking") diff --git a/cmd/utils.go b/cmd/utils.go index 9c60e5b..13f4527 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -71,7 +71,6 @@ type loadStackResult struct { StackFile *stack.StackFile Stack *stack.Stack CurrentBranch string - Lock *stack.FileLock // caller must defer Lock.Unlock() if non-nil } // loadStack is the standard way to obtain a Stack for the current (or given) @@ -79,6 +78,9 @@ type loadStackResult struct { // branch, calls resolveStack (which may prompt for disambiguation), checks for // a nil stack, and re-reads the current branch (in case disambiguation caused // a checkout). Errors are printed via cfg and returned. +// +// loadStack does NOT acquire the stack file lock. The lock is acquired +// automatically by stack.Save() when writing. func loadStack(cfg *config.Config, branch string) (*loadStackResult, error) { gitDir, err := git.GitDir() if err != nil { @@ -86,19 +88,6 @@ func loadStack(cfg *config.Config, branch string) (*loadStackResult, error) { return nil, fmt.Errorf("not a git repository") } - lock, err := stack.Lock(gitDir) - if err != nil { - cfg.Errorf("another process is currently editing the stack — try again later") - return nil, ErrLockFailed - } - // Release the lock if we fail before returning it to the caller. - success := false - defer func() { - if !success { - lock.Unlock() - } - }() - sf, err := stack.Load(gitDir) if err != nil { cfg.Errorf("failed to load stack state: %s", err) @@ -140,71 +129,25 @@ func loadStack(cfg *config.Config, branch string) (*loadStackResult, error) { return nil, fmt.Errorf("failed to get current branch: %w", err) } - success = true return &loadStackResult{ GitDir: gitDir, StackFile: sf, Stack: s, CurrentBranch: currentBranch, - Lock: lock, }, nil } -// loadStackReadOnly loads the stack without acquiring an exclusive lock. -// Use this for commands that only read the stack and never call Save(). -func loadStackReadOnly(cfg *config.Config, branch string) (*loadStackResult, error) { - gitDir, err := git.GitDir() - if err != nil { - cfg.Errorf("not a git repository") - return nil, fmt.Errorf("not a git repository") - } - - sf, err := stack.Load(gitDir) - if err != nil { - cfg.Errorf("failed to load stack state: %s", err) - return nil, fmt.Errorf("failed to load stack state: %w", err) - } - - branchFromArg := branch != "" - if branch == "" { - branch, err = git.CurrentBranch() - if err != nil { - cfg.Errorf("failed to get current branch: %s", err) - return nil, fmt.Errorf("failed to get current branch: %w", err) - } - } - - s, err := resolveStack(sf, branch, cfg) - if err != nil { - if errors.Is(err, errInterrupt) { - return nil, errInterrupt - } - cfg.Errorf("%s", err) - return nil, err - } - if s == nil { - if branchFromArg { - cfg.Errorf("branch %q is not part of a stack", branch) - } else { - cfg.Errorf("current branch %q is not part of a stack", branch) - } - cfg.Printf("Checkout an existing stack using `%s` or create a new stack using `%s`", - cfg.ColorCyan("gh stack checkout"), cfg.ColorCyan("gh stack init")) - return nil, fmt.Errorf("branch %q is not part of a stack", branch) - } - - currentBranch, err := git.CurrentBranch() - if err != nil { - cfg.Errorf("failed to get current branch: %s", err) - return nil, fmt.Errorf("failed to get current branch: %w", err) +// handleSaveError translates a stack.Save error into the appropriate user +// message and exit error. Lock failures return ErrLockFailed (exit 8); +// other write failures return ErrSilent (exit 1). +func handleSaveError(cfg *config.Config, err error) error { + var lockErr *stack.LockError + if errors.As(err, &lockErr) { + cfg.Errorf("another process is currently editing the stack — try again later") + return ErrLockFailed } - - return &loadStackResult{ - GitDir: gitDir, - StackFile: sf, - Stack: s, - CurrentBranch: currentBranch, - }, nil + cfg.Errorf("failed to save stack state: %s", err) + return ErrSilent } // resolveStack finds the stack for the given branch, handling ambiguity when diff --git a/cmd/view.go b/cmd/view.go index d9a13fb..a37e572 100644 --- a/cmd/view.go +++ b/cmd/view.go @@ -3,7 +3,6 @@ package cmd import ( "bytes" "encoding/json" - "errors" "fmt" "os" "os/exec" @@ -44,18 +43,14 @@ func ViewCmd(cfg *config.Config) *cobra.Command { func runView(cfg *config.Config, opts *viewOptions) error { result, err := loadStack(cfg, "") if err != nil { - if errors.Is(err, ErrLockFailed) { - return ErrLockFailed - } return ErrNotInStack } - defer result.Lock.Unlock() gitDir := result.GitDir sf := result.StackFile s := result.Stack currentBranch := result.CurrentBranch - // Sync PR state + // Sync PR state and save (best-effort). syncStackPRs(cfg, s) _ = stack.Save(gitDir, sf) diff --git a/internal/stack/lock.go b/internal/stack/lock.go index 9e1d163..6e67311 100644 --- a/internal/stack/lock.go +++ b/internal/stack/lock.go @@ -9,10 +9,20 @@ import ( const lockFileName = "gh-stack.lock" +// LockError is returned when the stack file lock cannot be acquired. +// Callers can check for this with errors.As to distinguish lock failures +// from other errors. +type LockError struct { + Err error +} + +func (e *LockError) Error() string { return e.Err.Error() } +func (e *LockError) Unwrap() error { return e.Err } + // LockTimeout is how long Lock() will wait for the exclusive lock before -// giving up. This prevents processes (including AI agents) from hanging -// indefinitely when another instance holds the lock. -const LockTimeout = 30 * time.Second +// giving up. With the lock held only during file writes (milliseconds), +// this timeout primarily guards against stuck or orphaned lock files. +const LockTimeout = 5 * time.Second // lockRetryInterval is the sleep between non-blocking lock attempts. const lockRetryInterval = 100 * time.Millisecond @@ -25,16 +35,10 @@ type FileLock struct { // Lock acquires an exclusive lock on the stack file in the given git directory. // It retries with a non-blocking attempt every 100ms for up to LockTimeout. -// Callers must defer Unlock() to release the lock. -// -// Usage: // -// lock, err := stack.Lock(gitDir) -// if err != nil { ... } -// defer lock.Unlock() -// sf, err := stack.Load(gitDir) -// // ... modify sf ... -// stack.Save(gitDir, sf) +// Most callers should not use Lock directly — stack.Save() acquires the lock +// automatically. Use Lock only when you need to hold the lock across multiple +// operations (e.g. Load-Modify-Save as an atomic unit). func Lock(gitDir string) (*FileLock, error) { path := filepath.Join(gitDir, lockFileName) f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0644) diff --git a/internal/stack/lock_test.go b/internal/stack/lock_test.go index aab70cb..a59460f 100644 --- a/internal/stack/lock_test.go +++ b/internal/stack/lock_test.go @@ -66,6 +66,7 @@ func TestLock_SerializesConcurrentAccess(t *testing.T) { require.NoError(t, Save(dir, sf)) // Run 10 concurrent goroutines, each adding a stack under lock. + // Uses Lock + Load + SaveLocked for atomic read-modify-write. var wg sync.WaitGroup for i := 0; i < 10; i++ { wg.Add(1) @@ -80,7 +81,7 @@ func TestLock_SerializesConcurrentAccess(t *testing.T) { require.NoError(t, err) loaded.AddStack(makeStack("main", "branch")) - require.NoError(t, Save(dir, loaded)) + require.NoError(t, SaveLocked(dir, loaded)) }(i) } wg.Wait() diff --git a/internal/stack/stack.go b/internal/stack/stack.go index 4dca5a6..6841d5d 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -258,8 +258,25 @@ func Load(gitDir string) (*StackFile, error) { return &sf, nil } -// Save writes the stack file to the given git directory. +// Save acquires an exclusive lock on the stack file, writes sf as JSON, and +// releases the lock. The lock is held only for the duration of the write. func Save(gitDir string, sf *StackFile) error { + lock, err := Lock(gitDir) + if err != nil { + return &LockError{Err: err} + } + defer lock.Unlock() + return writeStackFile(gitDir, sf) +} + +// SaveLocked writes the stack file without acquiring the lock. The caller +// must already hold the lock (via Lock) to protect the write. Use this when +// you need an atomic Load-Modify-Save sequence. +func SaveLocked(gitDir string, sf *StackFile) error { + return writeStackFile(gitDir, sf) +} + +func writeStackFile(gitDir string, sf *StackFile) error { sf.SchemaVersion = schemaVersion if sf.Stacks == nil { sf.Stacks = []Stack{} diff --git a/skills/gh-stack/SKILL.md b/skills/gh-stack/SKILL.md index 38deb6d..4dd113a 100644 --- a/skills/gh-stack/SKILL.md +++ b/skills/gh-stack/SKILL.md @@ -732,7 +732,7 @@ gh stack unstack feature-auth --local | 5 | Invalid arguments | Fix the command invocation (check flags and arguments) | | 6 | Disambiguation required | A branch belongs to multiple stacks. Run `gh stack checkout ` to switch to a non-shared branch first | | 7 | Rebase already in progress | Run `gh stack rebase --continue` (after resolving conflicts) or `gh stack rebase --abort` to start over | -| 8 | Stack is locked | Another `gh stack` process is editing the stack file. Wait and retry — the lock times out after 30 seconds | +| 8 | Stack is locked | Another `gh stack` process is writing the stack file. Wait and retry — the lock times out after 5 seconds | ## Known limitations From 629ea06d707109c555372d0180bf7d1f06f42d92 Mon Sep 17 00:00:00 2001 From: Sameen Karim Date: Wed, 25 Mar 2026 21:21:26 -0400 Subject: [PATCH 4/6] non-blocking saves for non-critical writes --- cmd/merge.go | 2 +- cmd/rebase.go | 4 ++-- cmd/sync.go | 2 +- cmd/utils.go | 2 +- cmd/view.go | 2 +- internal/stack/lock.go | 8 +++++++- internal/stack/lock_test.go | 31 +++++++++++++++++++++++++++---- internal/stack/lock_unix.go | 5 +++++ internal/stack/lock_windows.go | 6 ++++++ internal/stack/stack.go | 21 ++++++++++++++++++++- 10 files changed, 71 insertions(+), 12 deletions(-) diff --git a/cmd/merge.go b/cmd/merge.go index 74c452d..ce5bf8e 100644 --- a/cmd/merge.go +++ b/cmd/merge.go @@ -44,7 +44,7 @@ func runMerge(cfg *config.Config, target string) error { syncStackPRs(cfg, s) // Persist the refreshed PR state. - _ = stack.Save(result.GitDir, result.StackFile) + stack.SaveNonBlocking(result.GitDir, result.StackFile) // Resolve which branch to operate on. var br *stack.BranchRef diff --git a/cmd/rebase.go b/cmd/rebase.go index 406f78d..b9f8d52 100644 --- a/cmd/rebase.go +++ b/cmd/rebase.go @@ -305,7 +305,7 @@ func runRebase(cfg *config.Config, opts *rebaseOptions) error { syncStackPRs(cfg, s) - _ = stack.Save(gitDir, sf) + stack.SaveNonBlocking(gitDir, sf) merged := s.MergedBranches() if len(merged) > 0 { @@ -496,7 +496,7 @@ func continueRebase(cfg *config.Config, gitDir string) error { syncStackPRs(cfg, s) - _ = stack.Save(gitDir, sf) + stack.SaveNonBlocking(gitDir, sf) cfg.Printf("All branches in stack rebased locally with %s", s.Trunk.Branch) cfg.Printf("To push up your changes and open/update the stack of PRs, run `%s`", diff --git a/cmd/sync.go b/cmd/sync.go index edbc60f..52c8184 100644 --- a/cmd/sync.go +++ b/cmd/sync.go @@ -221,7 +221,7 @@ func runSync(cfg *config.Config, opts *syncOptions) error { } else { // Persist refreshed PR state even on conflict, then bail out // before pushing or reporting success. - _ = stack.Save(gitDir, sf) + stack.SaveNonBlocking(gitDir, sf) return ErrConflict } } diff --git a/cmd/utils.go b/cmd/utils.go index 13f4527..afa5429 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -138,7 +138,7 @@ func loadStack(cfg *config.Config, branch string) (*loadStackResult, error) { } // handleSaveError translates a stack.Save error into the appropriate user -// message and exit error. Lock failures return ErrLockFailed (exit 8); +// message and exit error. Lock contention returns ErrLockFailed (exit 8); // other write failures return ErrSilent (exit 1). func handleSaveError(cfg *config.Config, err error) error { var lockErr *stack.LockError diff --git a/cmd/view.go b/cmd/view.go index a37e572..fb4cc47 100644 --- a/cmd/view.go +++ b/cmd/view.go @@ -52,7 +52,7 @@ func runView(cfg *config.Config, opts *viewOptions) error { // Sync PR state and save (best-effort). syncStackPRs(cfg, s) - _ = stack.Save(gitDir, sf) + stack.SaveNonBlocking(gitDir, sf) if opts.asJSON { return viewJSON(cfg, s, currentBranch) diff --git a/internal/stack/lock.go b/internal/stack/lock.go index 6e67311..e6393fe 100644 --- a/internal/stack/lock.go +++ b/internal/stack/lock.go @@ -52,9 +52,15 @@ func Lock(gitDir string) (*FileLock, error) { if err == nil { return &FileLock{f: f}, nil } + if !isLockBusy(err) { + // Unexpected error (e.g. bad fd) — don't retry. + f.Close() + return nil, fmt.Errorf("locking stack file: %w", err) + } if time.Now().After(deadline) { f.Close() - return nil, fmt.Errorf("timed out waiting for stack lock after %s — another gh-stack process may be running", LockTimeout) + return nil, &LockError{Err: fmt.Errorf( + "timed out waiting for stack lock after %s — another gh-stack process may be running", LockTimeout)} } time.Sleep(lockRetryInterval) } diff --git a/internal/stack/lock_test.go b/internal/stack/lock_test.go index a59460f..3a40ca8 100644 --- a/internal/stack/lock_test.go +++ b/internal/stack/lock_test.go @@ -1,6 +1,7 @@ package stack import ( + "fmt" "sync" "testing" "time" @@ -32,9 +33,13 @@ func TestLock_BlocksUntilReleased(t *testing.T) { require.NoError(t, err) acquired := make(chan struct{}) + errCh := make(chan error, 1) go func() { lock2, err := Lock(dir) - require.NoError(t, err) + if err != nil { + errCh <- err + return + } close(acquired) lock2.Unlock() }() @@ -43,6 +48,8 @@ func TestLock_BlocksUntilReleased(t *testing.T) { select { case <-acquired: t.Fatal("lock2 acquired while lock1 was still held") + case err := <-errCh: + t.Fatalf("lock2 failed: %v", err) case <-time.After(300 * time.Millisecond): // expected — lock2 is waiting } @@ -53,6 +60,8 @@ func TestLock_BlocksUntilReleased(t *testing.T) { select { case <-acquired: // success + case err := <-errCh: + t.Fatalf("lock2 failed after lock1 released: %v", err) case <-time.After(5 * time.Second): t.Fatal("lock2 did not acquire after lock1 was released") } @@ -67,6 +76,7 @@ func TestLock_SerializesConcurrentAccess(t *testing.T) { // Run 10 concurrent goroutines, each adding a stack under lock. // Uses Lock + Load + SaveLocked for atomic read-modify-write. + errCh := make(chan error, 10) var wg sync.WaitGroup for i := 0; i < 10; i++ { wg.Add(1) @@ -74,17 +84,30 @@ func TestLock_SerializesConcurrentAccess(t *testing.T) { defer wg.Done() lock, err := Lock(dir) - require.NoError(t, err) + if err != nil { + errCh <- fmt.Errorf("goroutine %d Lock: %w", idx, err) + return + } defer lock.Unlock() loaded, err := Load(dir) - require.NoError(t, err) + if err != nil { + errCh <- fmt.Errorf("goroutine %d Load: %w", idx, err) + return + } loaded.AddStack(makeStack("main", "branch")) - require.NoError(t, SaveLocked(dir, loaded)) + if err := SaveLocked(dir, loaded); err != nil { + errCh <- fmt.Errorf("goroutine %d SaveLocked: %w", idx, err) + } }(i) } wg.Wait() + close(errCh) + + for err := range errCh { + t.Error(err) + } // All 10 stacks should be present — no lost updates. final, err := Load(dir) diff --git a/internal/stack/lock_unix.go b/internal/stack/lock_unix.go index 4da19da..c157961 100644 --- a/internal/stack/lock_unix.go +++ b/internal/stack/lock_unix.go @@ -13,6 +13,11 @@ func tryLockFile(f *os.File) error { return syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB) } +// isLockBusy reports whether err indicates the lock is held by another process. +func isLockBusy(err error) bool { + return err == syscall.EWOULDBLOCK +} + func unlockFile(f *os.File) { _ = syscall.Flock(int(f.Fd()), syscall.LOCK_UN) } diff --git a/internal/stack/lock_windows.go b/internal/stack/lock_windows.go index 92a2e07..0b7d033 100644 --- a/internal/stack/lock_windows.go +++ b/internal/stack/lock_windows.go @@ -3,6 +3,7 @@ package stack import ( + "errors" "os" "golang.org/x/sys/windows" @@ -22,6 +23,11 @@ func tryLockFile(f *os.File) error { ) } +// isLockBusy reports whether err indicates the lock is held by another process. +func isLockBusy(err error) bool { + return errors.Is(err, windows.ERROR_LOCK_VIOLATION) +} + func unlockFile(f *os.File) { ol := new(windows.Overlapped) _ = windows.UnlockFileEx( diff --git a/internal/stack/stack.go b/internal/stack/stack.go index 6841d5d..2e808da 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -260,15 +260,34 @@ func Load(gitDir string) (*StackFile, error) { // Save acquires an exclusive lock on the stack file, writes sf as JSON, and // releases the lock. The lock is held only for the duration of the write. +// Returns *LockError if the lock times out due to contention. func Save(gitDir string, sf *StackFile) error { lock, err := Lock(gitDir) if err != nil { - return &LockError{Err: err} + return err // *LockError for contention, plain error for I/O failures } defer lock.Unlock() return writeStackFile(gitDir, sf) } +// SaveNonBlocking attempts to save without blocking. If another process holds +// the lock, the save is silently skipped. Use this for best-effort metadata +// persistence (e.g. syncing PR state in view). +func SaveNonBlocking(gitDir string, sf *StackFile) { + path := filepath.Join(gitDir, lockFileName) + f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0644) + if err != nil { + return + } + if tryLockFile(f) != nil { + f.Close() + return + } + lock := &FileLock{f: f} + defer lock.Unlock() + _ = writeStackFile(gitDir, sf) +} + // SaveLocked writes the stack file without acquiring the lock. The caller // must already hold the lock (via Lock) to protect the write. Use this when // you need an atomic Load-Modify-Save sequence. From d5bb33e4894245855fa8b1ad76397f29183f3a89 Mon Sep 17 00:00:00 2001 From: Sameen Karim Date: Wed, 25 Mar 2026 21:57:22 -0400 Subject: [PATCH 5/6] additional lock file test --- internal/stack/lock.go | 4 ++-- internal/stack/lock_test.go | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/internal/stack/lock.go b/internal/stack/lock.go index e6393fe..b80cf2a 100644 --- a/internal/stack/lock.go +++ b/internal/stack/lock.go @@ -21,14 +21,14 @@ func (e *LockError) Unwrap() error { return e.Err } // LockTimeout is how long Lock() will wait for the exclusive lock before // giving up. With the lock held only during file writes (milliseconds), -// this timeout primarily guards against stuck or orphaned lock files. +// this timeout primarily guards against a hung process holding the lock. const LockTimeout = 5 * time.Second // lockRetryInterval is the sleep between non-blocking lock attempts. const lockRetryInterval = 100 * time.Millisecond // FileLock provides an exclusive advisory lock on the stack file to prevent -// concurrent Load-Modify-Save races between multiple gh-stack processes. +// concurrent writes between multiple gh-stack processes. type FileLock struct { f *os.File } diff --git a/internal/stack/lock_test.go b/internal/stack/lock_test.go index 3a40ca8..72e894d 100644 --- a/internal/stack/lock_test.go +++ b/internal/stack/lock_test.go @@ -2,6 +2,8 @@ package stack import ( "fmt" + "os" + "path/filepath" "sync" "testing" "time" @@ -123,6 +125,9 @@ func TestLock_FileLeftOnDisk(t *testing.T) { lock.Unlock() // Lock file should still exist after unlock (no os.Remove race). + _, err = os.Stat(filepath.Join(dir, lockFileName)) + require.NoError(t, err, "lock file should remain on disk after unlock") + lock2, err := Lock(dir) require.NoError(t, err, "should be able to re-lock after unlock") lock2.Unlock() From fc4528baa873d574becc0e4cb4ae74aa8b6c8b51 Mon Sep 17 00:00:00 2001 From: Sameen Karim Date: Thu, 26 Mar 2026 21:35:03 -0400 Subject: [PATCH 6/6] protect against time of check vs use race condition --- cmd/utils.go | 9 ++- internal/stack/lock.go | 12 +++- internal/stack/lock_test.go | 116 +++++++++++++++++++++++++++++++++++- internal/stack/stack.go | 74 +++++++++++++++++++---- 4 files changed, 195 insertions(+), 16 deletions(-) diff --git a/cmd/utils.go b/cmd/utils.go index afa5429..815b8a8 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -138,14 +138,19 @@ func loadStack(cfg *config.Config, branch string) (*loadStackResult, error) { } // handleSaveError translates a stack.Save error into the appropriate user -// message and exit error. Lock contention returns ErrLockFailed (exit 8); -// other write failures return ErrSilent (exit 1). +// message and exit error. Lock contention and stale-file detection both +// return ErrLockFailed (exit 8); other write failures return ErrSilent (exit 1). func handleSaveError(cfg *config.Config, err error) error { var lockErr *stack.LockError if errors.As(err, &lockErr) { cfg.Errorf("another process is currently editing the stack — try again later") return ErrLockFailed } + var staleErr *stack.StaleError + if errors.As(err, &staleErr) { + cfg.Errorf("stack file was modified by another process — please re-run the command") + return ErrLockFailed + } cfg.Errorf("failed to save stack state: %s", err) return ErrSilent } diff --git a/internal/stack/lock.go b/internal/stack/lock.go index b80cf2a..02a9dd9 100644 --- a/internal/stack/lock.go +++ b/internal/stack/lock.go @@ -19,10 +19,20 @@ type LockError struct { func (e *LockError) Error() string { return e.Err.Error() } func (e *LockError) Unwrap() error { return e.Err } +// StaleError is returned when the stack file was modified on disk since it +// was loaded. This indicates another process wrote to the file concurrently. +// Callers can check for this with errors.As. +type StaleError struct { + Err error +} + +func (e *StaleError) Error() string { return e.Err.Error() } +func (e *StaleError) Unwrap() error { return e.Err } + // LockTimeout is how long Lock() will wait for the exclusive lock before // giving up. With the lock held only during file writes (milliseconds), // this timeout primarily guards against a hung process holding the lock. -const LockTimeout = 5 * time.Second +var LockTimeout = 5 * time.Second // lockRetryInterval is the sleep between non-blocking lock attempts. const lockRetryInterval = 100 * time.Millisecond diff --git a/internal/stack/lock_test.go b/internal/stack/lock_test.go index 72e894d..6afefc6 100644 --- a/internal/stack/lock_test.go +++ b/internal/stack/lock_test.go @@ -1,6 +1,7 @@ package stack import ( + "errors" "fmt" "os" "path/filepath" @@ -77,7 +78,7 @@ func TestLock_SerializesConcurrentAccess(t *testing.T) { require.NoError(t, Save(dir, sf)) // Run 10 concurrent goroutines, each adding a stack under lock. - // Uses Lock + Load + SaveLocked for atomic read-modify-write. + // Uses Lock + Load + writeStackFile for atomic read-modify-write. errCh := make(chan error, 10) var wg sync.WaitGroup for i := 0; i < 10; i++ { @@ -99,8 +100,8 @@ func TestLock_SerializesConcurrentAccess(t *testing.T) { } loaded.AddStack(makeStack("main", "branch")) - if err := SaveLocked(dir, loaded); err != nil { - errCh <- fmt.Errorf("goroutine %d SaveLocked: %w", idx, err) + if err := writeStackFile(dir, loaded); err != nil { + errCh <- fmt.Errorf("goroutine %d writeStackFile: %w", idx, err) } }(i) } @@ -132,3 +133,112 @@ func TestLock_FileLeftOnDisk(t *testing.T) { require.NoError(t, err, "should be able to re-lock after unlock") lock2.Unlock() } + +func TestLock_TimesOut(t *testing.T) { + dir := t.TempDir() + + // Hold the lock so the second attempt can never acquire it. + lock1, err := Lock(dir) + require.NoError(t, err) + defer lock1.Unlock() + + // Save original timeout and set a short one for the test. + origTimeout := LockTimeout + LockTimeout = 200 * time.Millisecond + defer func() { LockTimeout = origTimeout }() + + start := time.Now() + lock2, err := Lock(dir) + elapsed := time.Since(start) + + assert.Nil(t, lock2, "should not acquire lock") + require.Error(t, err) + + var lockErr *LockError + require.True(t, errors.As(err, &lockErr), "error should be *LockError, got %T", err) + assert.Contains(t, lockErr.Error(), "timed out") + + // Should have waited roughly LockTimeout before giving up. + assert.GreaterOrEqual(t, elapsed, 150*time.Millisecond, "should wait near the timeout") +} + +func TestSave_DetectsStaleFile(t *testing.T) { + dir := t.TempDir() + + // Write an initial stack file. + sf := &StackFile{SchemaVersion: 1, Stacks: []Stack{}} + require.NoError(t, Save(dir, sf)) + + // Load — captures the on-disk checksum. + loaded, err := Load(dir) + require.NoError(t, err) + + // Simulate another process: load, modify, save. + other, err := Load(dir) + require.NoError(t, err) + other.AddStack(makeStack("main", "sneaky")) + require.NoError(t, Save(dir, other)) + + // Our loaded copy tries to save — should detect staleness. + loaded.AddStack(makeStack("main", "my-branch")) + err = Save(dir, loaded) + require.Error(t, err) + + var staleErr *StaleError + require.True(t, errors.As(err, &staleErr), "error should be *StaleError, got %T", err) + assert.Contains(t, staleErr.Error(), "modified by another process") +} + +func TestSave_AllowsWriteWhenFileUnchanged(t *testing.T) { + dir := t.TempDir() + + // Write, load, modify, save — no concurrent changes. + sf := &StackFile{SchemaVersion: 1, Stacks: []Stack{}} + require.NoError(t, Save(dir, sf)) + + loaded, err := Load(dir) + require.NoError(t, err) + + loaded.AddStack(makeStack("main", "feature")) + require.NoError(t, Save(dir, loaded)) + + // Verify the write actually persisted. + final, err := Load(dir) + require.NoError(t, err) + assert.Len(t, final.Stacks, 1) +} + +func TestSave_AllowsFirstWrite(t *testing.T) { + dir := t.TempDir() + + // File doesn't exist — Load returns nil checksum, Save should succeed. + sf, err := Load(dir) + require.NoError(t, err) + assert.Empty(t, sf.Stacks) + + sf.AddStack(makeStack("main", "first")) + require.NoError(t, Save(dir, sf), "first save to a new file should succeed") + + final, err := Load(dir) + require.NoError(t, err) + assert.Len(t, final.Stacks, 1) +} + +func TestSave_DoubleSaveSucceeds(t *testing.T) { + dir := t.TempDir() + + sf, err := Load(dir) + require.NoError(t, err) + + sf.AddStack(makeStack("main", "first")) + require.NoError(t, Save(dir, sf), "first save should succeed") + + // A second Save on the same instance must not spuriously fail — + // writeStackFile refreshes loadChecksum after writing. + sf.AddStack(makeStack("main", "second")) + require.NoError(t, Save(dir, sf), "second save on same instance should succeed") + + final, err := Load(dir) + require.NoError(t, err) + assert.Len(t, final.Stacks, 2) +} diff --git a/internal/stack/stack.go b/internal/stack/stack.go index 2e808da..5405c7a 100644 --- a/internal/stack/stack.go +++ b/internal/stack/stack.go @@ -1,6 +1,8 @@ package stack import ( + "bytes" + "crypto/sha256" "encoding/json" "errors" "fmt" @@ -168,6 +170,11 @@ type StackFile struct { SchemaVersion int `json:"schemaVersion"` Repository string `json:"repository"` Stacks []Stack `json:"stacks"` + + // loadChecksum is the SHA-256 of the raw file bytes at Load time. + // Save uses it to detect concurrent modifications (optimistic concurrency). + // nil means the file did not exist when loaded. + loadChecksum []byte } // FindAllStacksForBranch returns all stacks that contain the given branch. @@ -233,11 +240,14 @@ func stackFilePath(gitDir string) string { // Load reads the stack file from the given git directory. // Returns an empty StackFile if the file does not exist. +// The returned StackFile records a checksum of the on-disk content so that +// Save can detect concurrent modifications. func Load(gitDir string) (*StackFile, error) { path := stackFilePath(gitDir) data, err := os.ReadFile(path) if err != nil { if errors.Is(err, os.ErrNotExist) { + // loadChecksum stays nil — sentinel for "file absent at load time". return &StackFile{ SchemaVersion: schemaVersion, Stacks: []Stack{}, @@ -255,24 +265,32 @@ func Load(gitDir string) (*StackFile, error) { return nil, fmt.Errorf("stack file has schema version %d, but this version of gh-stack only supports up to version %d — please upgrade gh-stack", sf.SchemaVersion, schemaVersion) } + sum := sha256.Sum256(data) + sf.loadChecksum = sum[:] return &sf, nil } -// Save acquires an exclusive lock on the stack file, writes sf as JSON, and -// releases the lock. The lock is held only for the duration of the write. -// Returns *LockError if the lock times out due to contention. +// Save acquires an exclusive lock on the stack file, verifies the file hasn't +// been modified since Load (optimistic concurrency), writes sf as JSON, and +// releases the lock. The lock is held only for the read-compare-write window. +// Returns *LockError if the lock times out, or *StaleError if another process +// modified the file since it was loaded. func Save(gitDir string, sf *StackFile) error { lock, err := Lock(gitDir) if err != nil { return err // *LockError for contention, plain error for I/O failures } defer lock.Unlock() + + if err := checkStale(gitDir, sf); err != nil { + return err + } return writeStackFile(gitDir, sf) } // SaveNonBlocking attempts to save without blocking. If another process holds -// the lock, the save is silently skipped. Use this for best-effort metadata -// persistence (e.g. syncing PR state in view). +// the lock or the file was modified since Load, the save is silently skipped. +// Use this for best-effort metadata persistence (e.g. syncing PR state in view). func SaveNonBlocking(gitDir string, sf *StackFile) { path := filepath.Join(gitDir, lockFileName) f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0644) @@ -285,14 +303,46 @@ func SaveNonBlocking(gitDir string, sf *StackFile) { } lock := &FileLock{f: f} defer lock.Unlock() + + if checkStale(gitDir, sf) != nil { + return + } _ = writeStackFile(gitDir, sf) } -// SaveLocked writes the stack file without acquiring the lock. The caller -// must already hold the lock (via Lock) to protect the write. Use this when -// you need an atomic Load-Modify-Save sequence. -func SaveLocked(gitDir string, sf *StackFile) error { - return writeStackFile(gitDir, sf) +// checkStale compares the current on-disk content against the checksum +// captured at Load time. Returns *StaleError if the file was modified +// by another process. The caller must hold the lock. +func checkStale(gitDir string, sf *StackFile) error { + path := stackFilePath(gitDir) + data, err := os.ReadFile(path) + + if errors.Is(err, os.ErrNotExist) { + // File absent on disk. + if sf.loadChecksum == nil { + return nil // was absent at Load time too — no conflict + } + // File existed at Load but is now gone. Allow the write to + // recreate it rather than erroring; this is not a lost-update. + return nil + } + if err != nil { + return fmt.Errorf("reading stack file for staleness check: %w", err) + } + + // File exists on disk. + if sf.loadChecksum == nil { + // File was absent at Load but another process created it. + return &StaleError{Err: fmt.Errorf( + "stack file was created by another process since it was loaded")} + } + + sum := sha256.Sum256(data) + if !bytes.Equal(sf.loadChecksum, sum[:]) { + return &StaleError{Err: fmt.Errorf( + "stack file was modified by another process since it was loaded")} + } + return nil } func writeStackFile(gitDir string, sf *StackFile) error { @@ -308,5 +358,9 @@ func writeStackFile(gitDir string, sf *StackFile) error { if err := os.WriteFile(path, data, 0644); err != nil { return fmt.Errorf("writing stack file: %w", err) } + // Refresh checksum so a second Save on the same StackFile doesn't + // spuriously fail the staleness check. + sum := sha256.Sum256(data) + sf.loadChecksum = sum[:] return nil }