From 086289675d5bd62b2b956d84e8cde6821d0d9c16 Mon Sep 17 00:00:00 2001 From: Anders Pearson Date: Tue, 26 Sep 2017 13:22:48 +0100 Subject: [PATCH 1/7] graceful shutdown on SIGTERM --- hound.go | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/hound.go b/hound.go index f2299b6..8bc4575 100644 --- a/hound.go +++ b/hound.go @@ -1,6 +1,7 @@ package main // import "github.com/ccnmtl/hound" import ( + "context" "encoding/json" "expvar" "flag" @@ -8,7 +9,10 @@ import ( "html/template" "io/ioutil" "net/http" + "os" + "os/signal" "strings" + "syscall" "time" log "github.com/Sirupsen/logrus" @@ -187,5 +191,22 @@ func main() { ReadTimeout: time.Duration(c.ReadTimeout) * time.Second, WriteTimeout: time.Duration(c.WriteTimeout) * time.Second, } - log.Fatal(s.ListenAndServe()) + go func() { + log.Fatal(s.ListenAndServe()) + }() + stop := make(chan os.Signal, 1) + signal.Notify(stop, os.Interrupt, syscall.SIGTERM) + + <-stop + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + if err = s.Shutdown(ctx); err != nil { + log.WithFields( + log.Fields{ + "error": fmt.Sprintf("%v", err), + }).Fatal("graceful shutdown failed") + } else { + log.Info("successful graceful shutdown") + } } From dc1f7ec165b061b9db9cfcf7ac36ade3ec18526a Mon Sep 17 00:00:00 2001 From: Anders Pearson Date: Tue, 26 Sep 2017 13:44:57 +0100 Subject: [PATCH 2/7] upgrade docker image to Go 1.8 `centurylink/ca-certs` is stuck on 1.5 and hasn't been updated in two years :( graceful shutdown of the http server requires Go 1.8 or higher. This switches the base image to `golang:1.8`. The resulting image is a bit larger, but should otherwise work the same. --- Dockerfile | 14 ++++++++------ Makefile | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4e6285a..7eaa488 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,12 @@ -FROM centurylink/ca-certs -COPY hound / -COPY index.html / -COPY alert.html / +FROM golang:1.8 +WORKDIR /go/src/app +COPY . . +RUN go-wrapper install + ENV HOUND_HTTP_PORT=9998 -ENV HOUND_TEMPLATE_FILE=/index.html +ENV HOUND_TEMPLATE_FILE=/go/src/app/index.html ENV HOUND_SMTP_SERVER=postfix ENV HOUND_SMTP_PORT=25 EXPOSE 9998 -CMD ["/hound", "-config=/config.json"] +CMD ["go-wrapper", "run", "-config", "/config.json"] + diff --git a/Makefile b/Makefile index cb84a32..381e219 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ coverage.html: coverage.out go tool cover -html=coverage.out -o coverage.html build: - docker run --rm -v $(ROOT_DIR):/src -v /var/run/docker.sock:/var/run/docker.sock centurylink/golang-builder ccnmtl/hound + docker build -t ccnmtl/hound . push: build docker push ccnmtl/hound From 8df8d0249a163baee6d33ea6afd25074575c739b Mon Sep 17 00:00:00 2001 From: Anders Pearson Date: Tue, 26 Sep 2017 15:26:35 +0100 Subject: [PATCH 3/7] explicitly shutdown the polling --- alertscollection.go | 13 +++++++++---- hound.go | 21 ++++++++++++++++----- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/alertscollection.go b/alertscollection.go index f5598a4..0022557 100644 --- a/alertscollection.go +++ b/alertscollection.go @@ -2,6 +2,7 @@ package main import ( "bytes" + "context" "fmt" "net" "time" @@ -123,11 +124,15 @@ func intmin(a, b int) int { return b } -func (ac *alertsCollection) Run() { +func (ac *alertsCollection) Run(ctx context.Context) { for { - ac.processAll() - ac.DisplayAll() - time.Sleep(time.Duration(checkInterval) * time.Minute) + select { + case <-ctx.Done(): + return + case <-time.After(time.Duration(checkInterval) * time.Minute): + ac.processAll() + ac.DisplayAll() + } } } diff --git a/hound.go b/hound.go index 8bc4575..f8cf9bf 100644 --- a/hound.go +++ b/hound.go @@ -154,9 +154,6 @@ func main() { ac.addAlert(newAlert(a.Name, a.Metric, a.Type, a.Threshold, a.Direction, httpFetcher{}, emailTo, a.RunBookLink)) } - // kick it off in the background - go ac.Run() - http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { pr := ac.MakePageResponse() @@ -191,14 +188,28 @@ func main() { ReadTimeout: time.Duration(c.ReadTimeout) * time.Second, WriteTimeout: time.Duration(c.WriteTimeout) * time.Second, } + + bgcontext := context.Background() + alertsctx, alertscancel := context.WithCancel(bgcontext) + + // kick off alerts in the background + go ac.Run(alertsctx) + + // and the http server in the background go func() { log.Fatal(s.ListenAndServe()) }() + + // wait for a SIGTERM stop := make(chan os.Signal, 1) signal.Notify(stop, os.Interrupt, syscall.SIGTERM) - <-stop - ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + + // then gracefully shut everything down. + alertscancel() + + // giving the http server 1 second to close its connections + ctx, cancel := context.WithTimeout(bgcontext, 1*time.Second) defer cancel() if err = s.Shutdown(ctx); err != nil { From 8470e40996132a4509bb998a4cca056ea5bbbec7 Mon Sep 17 00:00:00 2001 From: Anders Pearson Date: Tue, 26 Sep 2017 15:39:36 +0100 Subject: [PATCH 4/7] extract startServices() function --- hound.go | 50 ++++++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/hound.go b/hound.go index f8cf9bf..84b1ecd 100644 --- a/hound.go +++ b/hound.go @@ -144,6 +144,32 @@ func main() { } }() + bgcontext := context.Background() + s, alertscancel := startServices(bgcontext, f, c) + + // wait for a SIGTERM + stop := make(chan os.Signal, 1) + signal.Notify(stop, os.Interrupt, syscall.SIGTERM) + <-stop + + // then gracefully shut everything down. + alertscancel() + + // giving the http server 1 second to close its connections + ctx, cancel := context.WithTimeout(bgcontext, 1*time.Second) + defer cancel() + + if err = s.Shutdown(ctx); err != nil { + log.WithFields( + log.Fields{ + "error": fmt.Sprintf("%v", err), + }).Fatal("graceful shutdown failed") + } else { + log.Info("successful graceful shutdown") + } +} + +func startServices(ctx context.Context, f configData, c config) (*http.Server, context.CancelFunc) { // initialize all the alerts ac := newAlertsCollection(smtpEmailer{}) for _, a := range f.Alerts { @@ -189,8 +215,7 @@ func main() { WriteTimeout: time.Duration(c.WriteTimeout) * time.Second, } - bgcontext := context.Background() - alertsctx, alertscancel := context.WithCancel(bgcontext) + alertsctx, alertscancel := context.WithCancel(ctx) // kick off alerts in the background go ac.Run(alertsctx) @@ -200,24 +225,5 @@ func main() { log.Fatal(s.ListenAndServe()) }() - // wait for a SIGTERM - stop := make(chan os.Signal, 1) - signal.Notify(stop, os.Interrupt, syscall.SIGTERM) - <-stop - - // then gracefully shut everything down. - alertscancel() - - // giving the http server 1 second to close its connections - ctx, cancel := context.WithTimeout(bgcontext, 1*time.Second) - defer cancel() - - if err = s.Shutdown(ctx); err != nil { - log.WithFields( - log.Fields{ - "error": fmt.Sprintf("%v", err), - }).Fatal("graceful shutdown failed") - } else { - log.Info("successful graceful shutdown") - } + return s, alertscancel } From 99b0ced1a942ffc85f64cd818b679f3f32f0c80e Mon Sep 17 00:00:00 2001 From: Anders Pearson Date: Tue, 26 Sep 2017 15:43:59 +0100 Subject: [PATCH 5/7] extract loadConfig() function --- hound.go | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/hound.go b/hound.go index 84b1ecd..80f1aac 100644 --- a/hound.go +++ b/hound.go @@ -75,19 +75,10 @@ func main() { flag.StringVar(&configfile, "config", "./config.json", "JSON config file") flag.Parse() - file, err := ioutil.ReadFile(configfile) - if err != nil { - log.Fatal(err) - } - - f := configData{} - err = json.Unmarshal(file, &f) - if err != nil { - log.Fatal(err) - } + f := loadConfig(configfile) var c config - err = envconfig.Process("hound", &c) + err := envconfig.Process("hound", &c) if err != nil { log.Fatal(err.Error()) } @@ -169,6 +160,20 @@ func main() { } } +func loadConfig(configfile string) configData { + file, err := ioutil.ReadFile(configfile) + if err != nil { + log.Fatal(err) + } + + f := configData{} + err = json.Unmarshal(file, &f) + if err != nil { + log.Fatal(err) + } + return f +} + func startServices(ctx context.Context, f configData, c config) (*http.Server, context.CancelFunc) { // initialize all the alerts ac := newAlertsCollection(smtpEmailer{}) From 69c52a8006ce3475d482d6965dfd62b59f2df991 Mon Sep 17 00:00:00 2001 From: Anders Pearson Date: Tue, 26 Sep 2017 15:45:12 +0100 Subject: [PATCH 6/7] load config later nothing from the config file is used until we start up services --- hound.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hound.go b/hound.go index 80f1aac..af88246 100644 --- a/hound.go +++ b/hound.go @@ -75,8 +75,6 @@ func main() { flag.StringVar(&configfile, "config", "./config.json", "JSON config file") flag.Parse() - f := loadConfig(configfile) - var c config err := envconfig.Process("hound", &c) if err != nil { @@ -136,6 +134,8 @@ func main() { }() bgcontext := context.Background() + + f := loadConfig(configfile) s, alertscancel := startServices(bgcontext, f, c) // wait for a SIGTERM From ad4b75f0b162ae11e7401bfd629c8818f610ca9f Mon Sep 17 00:00:00 2001 From: Anders Pearson Date: Tue, 26 Sep 2017 16:39:11 +0100 Subject: [PATCH 7/7] reload config on SIGHUP --- hound.go | 106 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 66 insertions(+), 40 deletions(-) diff --git a/hound.go b/hound.go index af88246..6bc5764 100644 --- a/hound.go +++ b/hound.go @@ -133,30 +133,45 @@ func main() { } }() - bgcontext := context.Background() - f := loadConfig(configfile) + + bgcontext := context.Background() s, alertscancel := startServices(bgcontext, f, c) - // wait for a SIGTERM - stop := make(chan os.Signal, 1) - signal.Notify(stop, os.Interrupt, syscall.SIGTERM) - <-stop - - // then gracefully shut everything down. - alertscancel() - - // giving the http server 1 second to close its connections - ctx, cancel := context.WithTimeout(bgcontext, 1*time.Second) - defer cancel() - - if err = s.Shutdown(ctx); err != nil { - log.WithFields( - log.Fields{ - "error": fmt.Sprintf("%v", err), - }).Fatal("graceful shutdown failed") - } else { - log.Info("successful graceful shutdown") + sigs := make(chan os.Signal, 1) + signal.Notify(sigs, os.Interrupt, syscall.SIGTERM, syscall.SIGHUP) + + for { + // wait for a signal + signal := <-sigs + + // shut everything down nicely + // then gracefully shut everything down. + alertscancel() + + // giving the http server 1 second to close its connections + ctx, cancel := context.WithTimeout(bgcontext, 1*time.Second) + + if err = s.Shutdown(ctx); err != nil { + log.WithFields( + log.Fields{ + "error": fmt.Sprintf("%v", err), + }).Fatal("graceful shutdown failed") + } else { + log.Info("successful graceful shutdown") + } + cancel() + if signal == syscall.SIGHUP { + // reload config and restart services + f = loadConfig(configfile) + log.Info("re-read config") + s, alertscancel = startServices(bgcontext, f, c) + log.Info("restarted services") + } else { + // SIGINT or SIGTERM. We're done. + log.Info("exiting") + return + } } } @@ -174,18 +189,9 @@ func loadConfig(configfile string) configData { return f } -func startServices(ctx context.Context, f configData, c config) (*http.Server, context.CancelFunc) { - // initialize all the alerts - ac := newAlertsCollection(smtpEmailer{}) - for _, a := range f.Alerts { - emailTo := a.EmailTo - if emailTo == "" { - emailTo = c.EmailTo - } - ac.addAlert(newAlert(a.Name, a.Metric, a.Type, a.Threshold, a.Direction, httpFetcher{}, emailTo, a.RunBookLink)) - } - - http.HandleFunc("/", +func registerHandlers(ac *alertsCollection, c config) *http.ServeMux { + mux := http.NewServeMux() + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { pr := ac.MakePageResponse() @@ -198,7 +204,7 @@ func startServices(ctx context.Context, f configData, c config) (*http.Server, c t.Execute(w, pr) }) - http.HandleFunc("/alert/", + mux.HandleFunc("/alert/", func(w http.ResponseWriter, r *http.Request) { stringIdx := strings.Split(r.URL.String(), "/")[2] pr := ac.MakeindivPageResponse(stringIdx) @@ -214,20 +220,40 @@ func startServices(ctx context.Context, f configData, c config) (*http.Server, c } t.Execute(w, pr) }) - s := &http.Server{ - Addr: ":" + c.HTTPPort, - ReadTimeout: time.Duration(c.ReadTimeout) * time.Second, - WriteTimeout: time.Duration(c.WriteTimeout) * time.Second, - } + return mux +} +func startAlertsCollection(ctx context.Context, f configData, c config) (*alertsCollection, context.CancelFunc) { + // initialize all the alerts + ac := newAlertsCollection(smtpEmailer{}) + for _, a := range f.Alerts { + emailTo := a.EmailTo + if emailTo == "" { + emailTo = c.EmailTo + } + ac.addAlert(newAlert(a.Name, a.Metric, a.Type, a.Threshold, a.Direction, httpFetcher{}, emailTo, a.RunBookLink)) + } alertsctx, alertscancel := context.WithCancel(ctx) // kick off alerts in the background go ac.Run(alertsctx) + return ac, alertscancel +} + +func startServices(ctx context.Context, f configData, c config) (*http.Server, context.CancelFunc) { + ac, alertscancel := startAlertsCollection(ctx, f, c) + mux := registerHandlers(ac, c) + s := &http.Server{ + Addr: ":" + c.HTTPPort, + Handler: mux, + ReadTimeout: time.Duration(c.ReadTimeout) * time.Second, + WriteTimeout: time.Duration(c.WriteTimeout) * time.Second, + } + // and the http server in the background go func() { - log.Fatal(s.ListenAndServe()) + s.ListenAndServe() }() return s, alertscancel