Skip to content
Permalink
Browse files

grpclb: fix deadlock in grpclb connection cache (#3017)

Before the fix, if the timer to remove a SubConn fires at the same time
NewSubConn cancels the timer, it caused a mutex leak and deadlock.
  • Loading branch information...
menghanl committed Sep 10, 2019
1 parent 4ccf24a commit ac35b67779b9802189d5c0bcfc25d84ce7ba36a1
Showing with 44 additions and 1 deletion.
  1. +1 −1 balancer/grpclb/grpclb_util.go
  2. +43 −0 balancer/grpclb/grpclb_util_test.go
@@ -173,13 +173,13 @@ func (ccc *lbCacheClientConn) RemoveSubConn(sc balancer.SubConn) {

timer := time.AfterFunc(ccc.timeout, func() {
ccc.mu.Lock()
defer ccc.mu.Unlock()
if entry.abortDeleting {
return
}
ccc.cc.RemoveSubConn(sc)
delete(ccc.subConnToAddr, sc)
delete(ccc.subConnCache, addr)
ccc.mu.Unlock()
})
entry.cancel = func() {
if !timer.Stop() {
@@ -217,3 +217,46 @@ func TestLBCacheClientConnReuse(t *testing.T) {
t.Fatal(err)
}
}

// Test that if the timer to remove a SubConn fires at the same time NewSubConn
// cancels the timer, it doesn't cause deadlock.
func TestLBCache_RemoveTimer_New_Race(t *testing.T) {
mcc := newMockClientConn()
if err := checkMockCC(mcc, 0); err != nil {
t.Fatal(err)
}

ccc := newLBCacheClientConn(mcc)
ccc.timeout = time.Nanosecond
if err := checkCacheCC(ccc, 0, 0); err != nil {
t.Fatal(err)
}

sc, _ := ccc.NewSubConn([]resolver.Address{{Addr: "address1"}}, balancer.NewSubConnOptions{})
// One subconn in MockCC.
if err := checkMockCC(mcc, 1); err != nil {
t.Fatal(err)
}
// No subconn being deleted, and one in CacheCC.
if err := checkCacheCC(ccc, 0, 1); err != nil {
t.Fatal(err)
}

done := make(chan struct{})

go func() {
for i := 0; i < 1000; i++ {
// Remove starts a timer with 1 ns timeout, the NewSubConn will race
// with with the timer.
ccc.RemoveSubConn(sc)
sc, _ = ccc.NewSubConn([]resolver.Address{{Addr: "address1"}}, balancer.NewSubConnOptions{})
}
close(done)
}()

select {
case <-time.After(time.Second):
t.Fatalf("Test didn't finish within 1 second. Deadlock")
case <-done:
}
}

0 comments on commit ac35b67

Please sign in to comment.
You can’t perform that action at this time.