Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix race condition between stop/cancel and register #971

Merged
merged 1 commit into from Apr 2, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
38 changes: 20 additions & 18 deletions agent/agent_worker.go
Expand Up @@ -101,6 +101,7 @@ func NewAgentWorker(l logger.Logger, a *api.AgentRegisterResponse, m *metrics.Co
apiClient: apiClient,
debug: c.Debug,
agentConfiguration: c.AgentConfiguration,
stop: make(chan struct{}),
}
}

Expand All @@ -123,27 +124,30 @@ func (a *AgentWorker) Start() error {
pingInterval := time.Second * time.Duration(a.agent.PingInterval)
heartbeatInterval := time.Second * time.Duration(a.agent.HeartbeatInterval)

// Create the ticker
a.ticker = time.NewTicker(pingInterval)

// Setup and start the heartbeater
go func() {
// Keep the heartbeat running as long as the agent is
for a.running {
err := a.Heartbeat()
if err != nil {
// Get the last heartbeat time to the nearest microsecond
lastHeartbeat := time.Unix(atomic.LoadInt64(&a.lastPing), 0)

a.logger.Error("Failed to heartbeat %s. Will try again in %s. (Last successful was %v ago)",
err, heartbeatInterval, time.Now().Sub(lastHeartbeat))
}
for {
select {
case <-time.After(heartbeatInterval):
err := a.Heartbeat()
if err != nil {
// Get the last heartbeat time to the nearest microsecond
lastHeartbeat := time.Unix(atomic.LoadInt64(&a.lastPing), 0)

a.logger.Error("Failed to heartbeat %s. Will try again in %s. (Last successful was %v ago)",
err, heartbeatInterval, time.Now().Sub(lastHeartbeat))
}

time.Sleep(heartbeatInterval)
case <-a.stop:
a.logger.Debug("Stopping heartbeats")
return
}
}
}()

// Create the ticker and stop channels
a.ticker = time.NewTicker(pingInterval)
a.stop = make(chan struct{})

// Setup a timer to automatically disconnect if no job has started
if a.agentConfiguration.DisconnectAfterJob {
a.disconnectTimeoutTimer = time.NewTimer(time.Second * time.Duration(a.agentConfiguration.DisconnectAfterJobTimeout))
Expand Down Expand Up @@ -250,9 +254,7 @@ func (a *AgentWorker) Stop(graceful bool) {

// If we have a ticker, stop it, and send a signal to the stop channel,
// which will cause the agent worker to stop looping immediatly.
if a.ticker != nil {
close(a.stop)
}
close(a.stop)

// Mark the agent as stopping
a.stopping = true
Expand Down