Skip to content

Commit

Permalink
ipcache: Fix deadlock when ipcache GC results in datapath reload
Browse files Browse the repository at this point in the history
[ upstream commit 3285cbc ]

The following deadlock can occur when the ipcache GC relies on map
renames and datapath reloads to delete entries in combination with
endpoint regenrations triggered by the FQDN proxy which perform ipcache
upserts as part of regenerations:

Routine 1:
The following go routine holds the ipcache mutex while garbage collecting:

```
goroutine 779 [semacquire, 48 minutes]:
sync.runtime_SemacquireMutex(0xc0008f4f68, 0xc007663600, 0x0)
        /usr/local/go/src/runtime/sema.go:71 +0x47
sync.(*RWMutex).Lock(0xc0008f4f60)
        /usr/local/go/src/sync/rwmutex.go:103 +0x88
github.com/cilium/cilium/pkg/datapath/loader.(*Loader).Reinitialize(0xc0003d70a0, 0x2759d20, 0xc000468bc0, 0x2771e60, 0xc000515680, 0x2329, 0x7f87768eac80, 0xc00022ff10, 0x26faf60, 0xc0004a18b0, ...)
        /go/src/github.com/cilium/cilium/pkg/datapath/loader/base.go:124 +0xcd
main.(*Daemon).TriggerReloadWithoutCompile(0xc000515680, 0x2332316, 0x10, 0x12, 0xc008f75060, 0x16)
        /go/src/github.com/cilium/cilium/daemon/daemon.go:588 +0x202
github.com/cilium/cilium/pkg/datapath/ipcache.(*BPFListener).garbageCollect(0xc0006f8960, 0x2759d20, 0xc003748000, 0x0, 0x0, 0x0)
        /go/src/github.com/cilium/cilium/pkg/datapath/ipcache/listener.go:306 +0xadc
github.com/cilium/cilium/pkg/datapath/ipcache.(*BPFListener).OnIPIdentityCacheGC.func1(0x2759d20, 0xc003748000, 0x3a5bf00, 0x2)
        /go/src/github.com/cilium/cilium/pkg/datapath/ipcache/listener.go:340 +0x3e
github.com/cilium/cilium/pkg/controller.(*Controller).runController(0xc001e1e000)
        /go/src/github.com/cilium/cilium/pkg/controller/controller.go:205 +0xa2a
created by github.com/cilium/cilium/pkg/controller.(*Manager).updateController
        /go/src/github.com/cilium/cilium/pkg/controller/manager.go:120 +0xb09
```

As part of this, Reinitialize() is called which will require the compilation mutex to be acquired.

Routine 2
The following ongoing endpoint regeneration is holding the compilation lock and thus blocks Routine 1 from completing. It is itself blocked on the FQDN NameManager mutex.

```
goroutine 7227352 [semacquire, 48 minutes]:
sync.runtime_SemacquireMutex(0xc0005c3744, 0xc009bf4c00, 0x1)
        /usr/local/go/src/runtime/sema.go:71 +0x47
sync.(*Mutex).lockSlow(0xc0005c3740)
        /usr/local/go/src/sync/mutex.go:138 +0xfc
sync.(*Mutex).Lock(...)
        /usr/local/go/src/sync/mutex.go:81
github.com/cilium/cilium/pkg/fqdn.(*NameManager).Lock(0xc0005c3740)
        /go/src/github.com/cilium/cilium/pkg/fqdn/name_manager.go:93 +0x47
github.com/cilium/cilium/pkg/policy.(*SelectorCache).AddFQDNSelector(0xc0004691c0, 0x26faf00, 0xc00acd6e70, 0x0, 0x0, 0xc004106590, 0xb, 0x0, 0x0, 0x40ca00)
        /go/src/github.com/cilium/cilium/pkg/policy/selectorcache.go:580 +0x13f
github.com/cilium/cilium/pkg/policy.(*L4Filter).cacheFQDNSelector(...)
        /go/src/github.com/cilium/cilium/pkg/policy/l4.go:392
github.com/cilium/cilium/pkg/policy.(*L4Filter).cacheFQDNSelectors(0xc00acd6e70, 0xc008c7d100, 0x38, 0x38, 0xc0004691c0)
        /go/src/github.com/cilium/cilium/pkg/policy/l4.go:387 +0xad
github.com/cilium/cilium/pkg/policy.createL4Filter(0x2739660, 0xc001c49680, 0xc006beab60, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, ...)
        /go/src/github.com/cilium/cilium/pkg/policy/l4.go:489 +0x7ce
github.com/cilium/cilium/pkg/policy.createL4EgressFilter(...)
        /go/src/github.com/cilium/cilium/pkg/policy/l4.go:599
github.com/cilium/cilium/pkg/policy.mergeEgressPortProto(0x2739660, 0xc001c49680, 0xc0052087d0, 0xc006beab60, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, ...)
        /go/src/github.com/cilium/cilium/pkg/policy/rule.go:637 +0x187
github.com/cilium/cilium/pkg/policy.mergeEgress(0x2739660, 0xc001c49680, 0xc0052087d0, 0xc006beab60, 0x1, 0x1, 0x0, 0x0, 0x0, 0xc00b91f2c0, ...)
        /go/src/github.com/cilium/cilium/pkg/policy/rule.go:572 +0xdbd
github.com/cilium/cilium/pkg/policy.(*rule).resolveEgressPolicy(0xc0079a0400, 0x2739660, 0xc001c49680, 0xc0052087d0, 0xc005208418, 0xc0090738f0, 0x0, 0x0, 0x0, 0xc0090738f0, ...)
        /go/src/github.com/cilium/cilium/pkg/policy/rule.go:675 +0x265
github.com/cilium/cilium/pkg/policy.ruleSlice.resolveL4EgressPolicy(0xc008d0a9c0, 0x5, 0x8, 0x2739660, 0xc001c49680, 0xc0052087d0, 0xc008d6c300, 0x0, 0x0)
        /go/src/github.com/cilium/cilium/pkg/policy/rules.go:106 +0x70b
github.com/cilium/cilium/pkg/policy.(*Repository).resolvePolicyLocked(0xc000246460, 0xc0059fe820, 0xc0000066c3, 0xc0030b0960, 0xc006176201)
        /go/src/github.com/cilium/cilium/pkg/policy/repository.go:665 +0x3f9
github.com/cilium/cilium/pkg/policy.(*PolicyCache).updateSelectorPolicy(0xc000342b20, 0xc0059fe820, 0x3a5bf00, 0x0, 0x0)
        /go/src/github.com/cilium/cilium/pkg/policy/distillery.go:135 +0x148
github.com/cilium/cilium/pkg/policy.(*PolicyCache).UpdatePolicy(...)
        /go/src/github.com/cilium/cilium/pkg/policy/distillery.go:170
github.com/cilium/cilium/pkg/endpoint.(*Endpoint).regeneratePolicy(0xc001427080, 0x0, 0x0)
        /go/src/github.com/cilium/cilium/pkg/endpoint/policy.go:175 +0x255
github.com/cilium/cilium/pkg/endpoint.(*Endpoint).runPreCompilationSteps(0xc001427080, 0xc005d3a500, 0xc008d0a800, 0x0, 0x0)
        /go/src/github.com/cilium/cilium/pkg/endpoint/bpf.go:736 +0x96a
github.com/cilium/cilium/pkg/endpoint.(*Endpoint).regenerateBPF(0xc001427080, 0xc005d3a500, 0x0, 0x0, 0x0, 0x0)
        /go/src/github.com/cilium/cilium/pkg/endpoint/bpf.go:508 +0x1e0
github.com/cilium/cilium/pkg/endpoint.(*Endpoint).regenerate(0xc001427080, 0xc005d3a500, 0x0, 0x0)
        /go/src/github.com/cilium/cilium/pkg/endpoint/policy.go:349 +0x841
github.com/cilium/cilium/pkg/endpoint.(*EndpointRegenerationEvent).Handle(0xc003530210, 0xc0068115c0)
        /go/src/github.com/cilium/cilium/pkg/endpoint/events.go:58 +0x459
github.com/cilium/cilium/pkg/eventqueue.(*EventQueue).Run.func1()
        /go/src/github.com/cilium/cilium/pkg/eventqueue/eventqueue.go:260 +0x187
sync.(*Once).doSlow(0xc005f60e58, 0xc009bbb330)
        /usr/local/go/src/sync/once.go:66 +0xe3
sync.(*Once).Do(0xc005f60e58, 0xc009bbb330)
        /usr/local/go/src/sync/once.go:57 +0x45
created by github.com/cilium/cilium/pkg/eventqueue.(*EventQueue).Run
        /go/src/github.com/cilium/cilium/pkg/eventqueue/eventqueue.go:248 +0xa9
```

Routine 3

This third routine is holding the FQDN NameManager mutex while itself waiting on the ipcache mutex which is held by routine 1.

```
goroutine 7229843 [select, 48 minutes]:
golang.org/x/sync/semaphore.(*Weighted).Acquire(0xc000464230, 0x2759d60, 0xc000058018, 0x40000000, 0xc0008f9740, 0x0)
        /go/src/github.com/cilium/cilium/vendor/golang.org/x/sync/semaphore/semaphore.go:60 +0x28e
github.com/cilium/cilium/pkg/lock.(*SemaphoredMutex).Lock(...)
        /go/src/github.com/cilium/cilium/pkg/lock/semaphored_mutex.go:41
github.com/cilium/cilium/pkg/ipcache.(*IPCache).Upsert(0xc000464280, 0xc008858dd0, 0xd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1000596, 0x231acf1, ...)
        /go/src/github.com/cilium/cilium/pkg/ipcache/ipcache.go:184 +0x93
github.com/cilium/cilium/pkg/ipcache.allocateCIDRs(0xc004bb2a00, 0x98, 0x98, 0x0, 0x0, 0x0, 0x0, 0x0)
        /go/src/github.com/cilium/cilium/pkg/ipcache/cidr.go:97 +0x7aa
github.com/cilium/cilium/pkg/ipcache.AllocateCIDRsForIPs(0xc0046a2000, 0x98, 0x200, 0x1, 0x1, 0x0, 0x1f4a440, 0xc00a768900)
        /go/src/github.com/cilium/cilium/pkg/ipcache/cidr.go:52 +0x65
main.identitiesForFQDNSelectorIPs(0xc00a7687e0, 0xc0008f9740, 0x0, 0x0, 0x0)
        /go/src/github.com/cilium/cilium/daemon/fqdn.go:87 +0x38e
main.(*Daemon).updateSelectors(0xc000515680, 0x2759da0, 0xc000e09a40, 0xc00a7687e0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
        /go/src/github.com/cilium/cilium/daemon/fqdn.go:320 +0x40
github.com/cilium/cilium/pkg/fqdn.(*NameManager).UpdateGenerateDNS(0xc0005c3740, 0x2759da0, 0xc000e09a40, 0xbfae6eb0118dc6cd, 0x131b26c90d12, 0x3a5bf00, 0xc004cf9910, 0x0, 0x0, 0x0)
        /go/src/github.com/cilium/cilium/pkg/fqdn/name_manager.go:225 +0x48f
main.(*Daemon).notifyOnDNSMsg(0xc000515680, 0xbfae6eb0118dc6cd, 0x131b26c90d12, 0x3a5bf00, 0xc001427080, 0xc00a2bdec0, 0x11, 0xc00ae52340, 0x10, 0xc003564f30, ...)
        /go/src/github.com/cilium/cilium/daemon/fqdn.go:584 +0x12cd
github.com/cilium/cilium/pkg/fqdn/dnsproxy.(*DNSProxy).ServeDNS(0xc0008c8e80, 0x27818e0, 0xc00302e1e0, 0xc0072a4f30)
        /go/src/github.com/cilium/cilium/pkg/fqdn/dnsproxy/proxy.go:475 +0x238c
github.com/miekg/dns.(*Server).serveDNS(0xc000563a40, 0xc001f8be00, 0x6b, 0x200, 0xc00302e1e0)
        /go/src/github.com/cilium/cilium/vendor/github.com/miekg/dns/server.go:597 +0x20b
github.com/miekg/dns.(*Server).serveUDPPacket(0xc000563a40, 0xc00047d060, 0xc001f8be00, 0x6b, 0x200, 0xc000df0380, 0xc0092cf2b0)
        /go/src/github.com/cilium/cilium/vendor/github.com/miekg/dns/server.go:552 +0xb2
github.com/miekg/dns.(*Server).serveUDP.func2(0xc000563a40, 0xc00047d060, 0xc001f8be00, 0x6b, 0x200, 0xc000df0380, 0xc0092cf2b0)
        /go/src/github.com/cilium/cilium/vendor/github.com/miekg/dns/server.go:478 +0x67
created by github.com/miekg/dns.(*Server).serveUDP
        /go/src/github.com/cilium/cilium/vendor/github.com/miekg/dns/server.go:477 +0x272
```

Fixes: #11946

Signed-off-by: Thomas Graf <thomas@cilium.io>
Signed-off-by: Joe Stringer <joe@cilium.io>
  • Loading branch information
tgraf authored and joestringer committed Jun 12, 2020
1 parent 4e4c9d7 commit ad7a5c0
Showing 1 changed file with 33 additions and 16 deletions.
49 changes: 33 additions & 16 deletions pkg/datapath/ipcache/listener.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,13 @@ func shuffleMaps(realized, backup, pending string) error {
func (l *BPFListener) garbageCollect(ctx context.Context) (*sync.WaitGroup, error) {
log.Debug("Running garbage collection for BPF IPCache")

// Since controllers run asynchronously, need to make sure
// IPIdentityCache is not being updated concurrently while we do
// GC;
ipcache.IPIdentityCache.RLock()
defer ipcache.IPIdentityCache.RUnlock()

if ipcacheMap.SupportsDelete() {
// Since controllers run asynchronously, need to make sure
// IPIdentityCache is not being updated concurrently while we
// do GC;
ipcache.IPIdentityCache.RLock()
defer ipcache.IPIdentityCache.RUnlock()

keysToRemove := map[string]*ipcacheMap.Key{}
if err := l.bpfMap.DumpWithCallback(updateStaleEntriesFunction(keysToRemove)); err != nil {
return nil, fmt.Errorf("error dumping ipcache BPF map: %s", err)
Expand All @@ -234,10 +234,16 @@ func (l *BPFListener) garbageCollect(ctx context.Context) (*sync.WaitGroup, erro
}
}
} else {
// Since controllers run asynchronously, need to make sure
// IPIdentityCache is not being updated concurrently while we
// do GC;
ipcache.IPIdentityCache.RLock()

// Populate the map at the new path
pendingMapName := fmt.Sprintf("%s_pending", ipcacheMap.Name)
pendingMap := ipcacheMap.NewMap(pendingMapName)
if _, err := pendingMap.OpenOrCreate(); err != nil {
ipcache.IPIdentityCache.RUnlock()
return nil, fmt.Errorf("Unable to create %s map: %s", pendingMapName, err)
}
pendingListener := newListener(pendingMap, l.datapath)
Expand All @@ -251,24 +257,35 @@ func (l *BPFListener) garbageCollect(ctx context.Context) (*sync.WaitGroup, erro
// will pick up the new paths without requiring recompilation.
backupMapName := fmt.Sprintf("%s_old", ipcacheMap.Name)
if err := shuffleMaps(ipcacheMap.Name, backupMapName, pendingMapName); err != nil {
ipcache.IPIdentityCache.RUnlock()
return nil, err
}

wg, err := l.datapath.TriggerReloadWithoutCompile("datapath ipcache")
if err != nil {
// Reopen the ipcache map so that new writes and reads will use
// the new map
if err := ipcacheMap.Reopen(); err != nil {
handleMapShuffleFailure(backupMapName, ipcacheMap.Name)
ipcache.IPIdentityCache.RUnlock()
return nil, err
}

// If the base programs successfully compiled, then the maps
// should be OK so let's update all references to the IPCache
// so that they point to the new version.
_ = os.RemoveAll(bpf.MapPath(backupMapName))
if err := ipcacheMap.Reopen(); err != nil {
// Very unlikely; base program compilation succeeded.
log.WithError(err).Warning("Failed to reopen BPF ipcache map")
return nil, err
// Unlock the ipcache as in order for
// TriggerReloadWithoutCompile() to succeed, other endpoint
// regenerations which are blocking on the ipcache lock may
// need to succeed first (#11946)
ipcache.IPIdentityCache.RUnlock()

wg, err := l.datapath.TriggerReloadWithoutCompile("datapath ipcache")
if err != nil {
// We can't really undo the map rename again as ipcache
// operations had already been permitted so the backup
// map is potentially outdated. Fail hard to restart
// the agent so we reconstruct the ipcache from
// scratch.
log.WithError(err).Fatal("Endpoint datapath reload triggered by ipcache GC failed. Inconsistent state.")
}

_ = os.RemoveAll(bpf.MapPath(backupMapName))
return wg, nil
}
return nil, nil
Expand Down

0 comments on commit ad7a5c0

Please sign in to comment.