Skip to content

Commit

Permalink
daemon: Speed up initial synchronization with container runtime
Browse files Browse the repository at this point in the history
The current behaviour was to walk all local containers currently running
and check if they are supposed to be managed by Cilium. The function to
do so was shared with the logic is used when f.e. Docker sends an event
that a container has been created. This logic assumes that another event
is received later on which requests to handle networking for this
container.

While this logic makes sense when handling container create events, it
is very unlikely that this event will be received for an already running
container unless the container has just been start. Thus, the handler
would always time out waiting for the event. The daemon startup was
delayed until this happened.

This commit changes behaviour to:
 - No longer wait for the network manage event to be received on the
   initial container sync and on the regular sync. The first means that
   we simply ignore local containers if no network event was received.
   For the regular sync every N seconds, do not wait either as we
   retry anyway later on.
 - Continue waiting in the go routine when handling actual container
   create events.

Fixes: #259

Signed-off-by:  <thomas@cilium.io>
  • Loading branch information
tgraf committed Mar 18, 2017
1 parent 3ac1140 commit 145cbdc
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 17 deletions.
39 changes: 27 additions & 12 deletions daemon/docker_watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ func (d *Daemon) EnableDockerEventListener(since time.Time) error {
return nil
}

func (d *Daemon) SyncDocker(wg *sync.WaitGroup) {
func (d *Daemon) SyncDocker() {
var wg sync.WaitGroup

cList, err := d.dockerClient.ContainerList(ctx.Background(), dTypes.ContainerListOptions{All: false})
if err != nil {
log.Errorf("Failed to retrieve the container list %s", err)
Expand All @@ -68,21 +70,30 @@ func (d *Daemon) SyncDocker(wg *sync.WaitGroup) {

wg.Add(1)
go func(wg *sync.WaitGroup, id string) {
d.handleCreateContainer(id)
d.handleCreateContainer(id, false)
wg.Done()
}(wg, cont.ID)
}(&wg, cont.ID)
}

// Wait for all spawned go routines handling container creations to exit
wg.Wait()
}

func (d *Daemon) EnableDockerSync() {
var wg sync.WaitGroup
func (d *Daemon) backgroundContainerSync() {
for {
d.SyncDocker(&wg)
wg.Wait()
d.SyncDocker()
time.Sleep(syncRateDocker)
}
}

// RunBackgroundContainerSync spawns a go routine which periodically
// synchronizes containers managed by the local container runtime and
// checks if any of them need to be managed by Cilium. This is a fall
// back mechanism in case an event notification has been lost.
func (d *Daemon) RunBackgroundContainerSync() {
go d.backgroundContainerSync()
}

func (d *Daemon) listenForDockerEvents(reader io.ReadCloser) {
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
Expand All @@ -104,7 +115,7 @@ func (d *Daemon) processEvent(m dTypesEvents.Message) {
case "start":
// A real event overwrites any memory of ignored containers
d.StopIgnoringContainer(m.ID)
d.handleCreateContainer(m.ID)
d.handleCreateContainer(m.ID, true)
case "die":
d.deleteContainer(m.ID)
}
Expand Down Expand Up @@ -200,16 +211,20 @@ func createContainer(dc *dTypes.ContainerJSON, l labels.Labels) types.Container
}
}

func (d *Daemon) handleCreateContainer(id string) {
func (d *Daemon) handleCreateContainer(id string, retry bool) {
log.Debugf("Processing create event for docker container %s", id)

maxTries := 5

for try := 1; try <= maxTries; try++ {
if try > 1 {
log.Debugf("Waiting for container %s to appear as endpoint [%d/%d]",
id, try, maxTries)
time.Sleep(time.Duration(try) * time.Second)
if retry {
log.Debugf("Waiting for container %s to appear as endpoint [%d/%d]",
id, try, maxTries)
time.Sleep(time.Duration(try) * time.Second)
} else {
return
}
}

dockerContainer, lbls, err := d.retrieveDockerLabels(id)
Expand Down
7 changes: 2 additions & 5 deletions daemon/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"os"
"path"
"strings"
"sync"
"time"

"github.com/cilium/cilium/api/v1/server"
Expand Down Expand Up @@ -270,10 +269,8 @@ func runDaemon() {

d.EnableMonitor()

var wg sync.WaitGroup
sinceLastSync := time.Now()
d.SyncDocker(&wg)
wg.Wait()
d.SyncDocker()

// Register event listener in docker endpoint
if err := d.EnableDockerEventListener(sinceLastSync); err != nil {
Expand All @@ -286,7 +283,7 @@ func runDaemon() {
log.Warningf("Error while enabling k8s watcher %s", err)
}

go d.EnableDockerSync()
d.RunBackgroundContainerSync()

swaggerSpec, err := loads.Analyzed(server.SwaggerJSON, "")
if err != nil {
Expand Down

0 comments on commit 145cbdc

Please sign in to comment.