From 5e59ddaf31c54736c1b3e9d2134123733de5bdea Mon Sep 17 00:00:00 2001 From: Adrian Serrano Date: Fri, 5 Jun 2020 17:45:00 +0200 Subject: [PATCH] Windows: fix service termination (#18916) Update the Windows service handling logic so that the service doesn't transition to the STOPPED state until the beater is terminated. Before this patch, a Beats service would report to be STOPPED as soon as it received the stop request. This causes some problems during service restarts, as the new service would start while the old one was still cleaning up. Fixes #18914 (cherry picked from commit f3ab7c78a92ac0e11feb537c92a5587ac465d1c1) --- CHANGELOG.next.asciidoc | 1 + libbeat/cmd/instance/beat.go | 6 ++++++ libbeat/service/service.go | 5 +++++ libbeat/service/service_unix.go | 3 +++ libbeat/service/service_windows.go | 27 ++++++++++++++++++++++++--- 5 files changed, 39 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index c3d2e047075..28020a1f385 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -70,6 +70,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Gives monitoring reporter hosts, if configured, total precedence over corresponding output hosts. {issue}17937[17937] {pull}17991[17991] - Fix `keystore add` hanging under Windows. {issue}18649[18649] {pull}18654[18654] - Fix regression in `add_kubernetes_metadata`, so configured `indexers` and `matchers` are used if defaults are not disabled. {issue}18481[18481] {pull}18818[18818] +- Fixed a service restart failure under Windows. {issue}18914[18914] {pull}18916[18916] *Auditbeat* diff --git a/libbeat/cmd/instance/beat.go b/libbeat/cmd/instance/beat.go index 10367a4dd29..d340aefe6e1 100644 --- a/libbeat/cmd/instance/beat.go +++ b/libbeat/cmd/instance/beat.go @@ -371,6 +371,12 @@ func (b *Beat) launch(settings Settings, bt beat.Creator) error { return err } + // Windows: Mark service as stopped. + // After this is run, a Beat service is considered by the OS to be stopped + // and another instance of the process can be started. + // This must be the first deferred cleanup task (last to execute). + defer svc.NotifyTermination() + // Try to acquire exclusive lock on data path to prevent another beat instance // sharing same data path. bl := newLocker(b) diff --git a/libbeat/service/service.go b/libbeat/service/service.go index ec6e0fca672..4c56cfc28a2 100644 --- a/libbeat/service/service.go +++ b/libbeat/service/service.go @@ -67,6 +67,11 @@ func HandleSignals(stopFunction func(), cancel context.CancelFunc) { }) } +// NotifyTermination tells the OS that the service is stopped. +func NotifyTermination() { + notifyWindowsServiceStopped() +} + // cmdline flags var memprofile, cpuprofile, httpprof *string var cpuOut *os.File diff --git a/libbeat/service/service_unix.go b/libbeat/service/service_unix.go index 7c6bfb4d08a..7d20b04620e 100644 --- a/libbeat/service/service_unix.go +++ b/libbeat/service/service_unix.go @@ -22,3 +22,6 @@ package service // ProcessWindowsControlEvents is not used on non-windows platforms. func ProcessWindowsControlEvents(stopCallback func()) { } + +func notifyWindowsServiceStopped() { +} diff --git a/libbeat/service/service_windows.go b/libbeat/service/service_windows.go index 649bf85cfa8..a81f4fb5a0f 100644 --- a/libbeat/service/service_windows.go +++ b/libbeat/service/service_windows.go @@ -28,7 +28,15 @@ import ( "github.com/elastic/beats/v7/libbeat/logp" ) -type beatService struct{} +type beatService struct { + stopCallback func() + done chan struct{} +} + +var serviceInstance = &beatService{ + stopCallback: nil, + done: make(chan struct{}, 0), +} // Execute runs the beat service with the arguments and manages changes that // occur in the environment or runtime that may affect the beat. @@ -52,9 +60,22 @@ loop: } } changes <- svc.Status{State: svc.StopPending} + m.stopCallback() + // Block until notifyWindowsServiceStopped below is called. This is required + // as the windows/svc package will transition the service to STOPPED state + // once this function returns. + <-m.done return } +func (m *beatService) stop() { + close(m.done) +} + +func notifyWindowsServiceStopped() { + serviceInstance.stop() +} + // couldNotConnect is the errno for ERROR_FAILED_SERVICE_CONTROLLER_CONNECT. const couldNotConnect syscall.Errno = 1063 @@ -76,10 +97,10 @@ func ProcessWindowsControlEvents(stopCallback func()) { run = debug.Run } - err = run(os.Args[0], &beatService{}) + serviceInstance.stopCallback = stopCallback + err = run(os.Args[0], serviceInstance) if err == nil { - stopCallback() return }