From 12c31e9506edf6c10cec5e977232c1fced6375a2 Mon Sep 17 00:00:00 2001 From: jwierzbo Date: Wed, 27 Oct 2021 16:03:08 +0200 Subject: [PATCH 1/5] TG-153 Exporter - network mesh check --- exporter.go | 14 +++ pkg/deployment/resources/secrets.go | 8 +- pkg/exporter/monitor.go | 151 ++++++++++++++++++++++++++++ pkg/exporter/passthru.go | 3 + pkg/util/k8sutil/constants.go | 8 +- 5 files changed, 178 insertions(+), 6 deletions(-) create mode 100644 pkg/exporter/monitor.go diff --git a/exporter.go b/exporter.go index b39876068..2bef3c21c 100644 --- a/exporter.go +++ b/exporter.go @@ -100,6 +100,20 @@ func cmdExporterCheckE() error { return err } + mon := exporter.NewMonitor(exporterInput.endpoint, func() (string, error) { + if exporterInput.jwtFile == "" { + return "", nil + } + + data, err := ioutil.ReadFile(exporterInput.jwtFile) + if err != nil { + return "", err + } + + return string(data), nil + }, false, 15*time.Second) + go mon.UpdateMonitorStatus() + exporter := exporter.NewExporter(exporterInput.listenAddress, "/metrics", p) if exporterInput.keyfile != "" { if e, err := exporter.WithKeyfile(exporterInput.keyfile); err != nil { diff --git a/pkg/deployment/resources/secrets.go b/pkg/deployment/resources/secrets.go index 3d8fe732c..71eb4ff13 100644 --- a/pkg/deployment/resources/secrets.go +++ b/pkg/deployment/resources/secrets.go @@ -431,9 +431,11 @@ func AppendKeyfileToKeyfolder(ctx context.Context, cachedStatus inspectorInterfa var ( exporterTokenClaims = jg.MapClaims{ - "iss": "arangodb", - "server_id": "exporter", - "allowed_paths": []interface{}{"/_admin/statistics", "/_admin/statistics-description", k8sutil.ArangoExporterInternalEndpoint, k8sutil.ArangoExporterInternalEndpointV2}, + "iss": "arangodb", + "server_id": "exporter", + "allowed_paths": []interface{}{"/_admin/statistics", "/_admin/statistics-description", + k8sutil.ArangoExporterInternalEndpoint, k8sutil.ArangoExporterInternalEndpointV2, + k8sutil.ArangoExporterStatusEndpoint, k8sutil.ArangoExporterClusterHealthEndpoint}, } ) diff --git a/pkg/exporter/monitor.go b/pkg/exporter/monitor.go new file mode 100644 index 000000000..5e9bfecc6 --- /dev/null +++ b/pkg/exporter/monitor.go @@ -0,0 +1,151 @@ +// +// DISCLAIMER +// +// Copyright 2016-2021 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Jakub Wierzbowski +// + +package exporter + +import ( + "encoding/json" + "errors" + "fmt" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" + "io/ioutil" + "net/url" + "os" + "path" + "strings" + "time" + + "github.com/arangodb/go-driver" + + "github.com/rs/zerolog/log" +) + +const ( + monitorMetricTemplate = "arangodb_member_health{role=\"%s\",id=\"%s\"} %d \n" + successRefreshInterval = time.Duration(time.Second * 120) + failRefreshInterval = time.Duration(time.Second * 15) +) + +var currentMembersStatus = "" + +func NewMonitor(arangodbEndpoint string, auth Authentication, sslVerify bool, timeout time.Duration) *monitor { + uri, err := setPath(arangodbEndpoint, k8sutil.ArangoExporterClusterHealthEndpoint) + if err != nil { + log.Error().Err(err).Msgf("Fatal") + os.Exit(1) + } + + return &monitor{ + factory: newHttpClientFactory(arangodbEndpoint, auth, sslVerify, timeout), + healthURI: uri, + } +} + +type monitor struct { + factory httpClientFactory + healthURI *url.URL +} + +func (m monitor) UpdateMonitorStatus() { + for { + sleep := successRefreshInterval + + health, err := m.GetClusterHealth() + if err != nil { + log.Error().Err(err).Msg("GetClusterHealth error") + sleep = failRefreshInterval + } else { + var output strings.Builder + for key, value := range health.Health { + entry, err := m.GetMemberStatus(key, value) + if err != nil { + log.Error().Err(err).Msg("GetMemberStatus error") + sleep = failRefreshInterval + } + output.WriteString(entry) + } + currentMembersStatus = output.String() + } + time.Sleep(sleep) + } +} + +func (m monitor) GetClusterHealth() (*driver.ClusterHealth, error) { + c, req, err := m.factory() + if err != nil { + return nil, err + } + req.URL = m.healthURI + resp, err := c.Do(req) + if err != nil { + return nil, err + } + + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + var result driver.ClusterHealth + if err := json.Unmarshal(body, &result); err != nil { + return nil, err + } + + return &result, err +} + +func (m monitor) GetMemberStatus(id driver.ServerID, member driver.ServerHealth) (string, error) { + result := fmt.Sprintf(monitorMetricTemplate, member.Role, id, 0) + + c, req, err := m.factory() + if err != nil { + return result, err + } + + req.URL, err = setPath(member.Endpoint, k8sutil.ArangoExporterStatusEndpoint) + + resp, err := c.Do(req) + if err != nil { + return result, err + } + + if resp.StatusCode != 200 { + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return result, err + } + return result, errors.New(string(body)) + } + return fmt.Sprintf(monitorMetricTemplate, member.Role, id, 1), nil +} + +func setPath(uri, uriPath string) (*url.URL, error) { + u, err := url.Parse(uri) + if err != nil { + return u, err + } + u.Path = path.Join(uriPath) + u.Scheme = "https" + return u, nil +} diff --git a/pkg/exporter/passthru.go b/pkg/exporter/passthru.go index ec8d8397d..d76e1ba1e 100644 --- a/pkg/exporter/passthru.go +++ b/pkg/exporter/passthru.go @@ -123,6 +123,9 @@ func (p passthru) ServeHTTP(resp http.ResponseWriter, req *http.Request) { // Fix Header response responseStr = strings.ReplaceAll(responseStr, "guage", "gauge") + // Attach monitor data + responseStr = responseStr + currentMembersStatus + resp.WriteHeader(data.StatusCode) _, err = resp.Write([]byte(responseStr)) if err != nil { diff --git a/pkg/util/k8sutil/constants.go b/pkg/util/k8sutil/constants.go index c8b99ce0a..1b1b9e781 100644 --- a/pkg/util/k8sutil/constants.go +++ b/pkg/util/k8sutil/constants.go @@ -29,9 +29,11 @@ const ( ArangoSyncWorkerPort = 8729 ArangoExporterPort = 9101 - ArangoExporterInternalEndpoint = "/_admin/metrics" - ArangoExporterInternalEndpointV2 = "/_admin/metrics/v2" - ArangoExporterDefaultEndpoint = "/metrics" + ArangoExporterStatusEndpoint = "/_api/version" + ArangoExporterClusterHealthEndpoint = "/_admin/cluster/health" + ArangoExporterInternalEndpoint = "/_admin/metrics" + ArangoExporterInternalEndpointV2 = "/_admin/metrics/v2" + ArangoExporterDefaultEndpoint = "/metrics" // K8s constants ClusterIPNone = "None" From cd82636cb3b7f2da75ed925ee33bd42e1cb9575b Mon Sep 17 00:00:00 2001 From: jwierzbo Date: Fri, 29 Oct 2021 09:54:52 +0200 Subject: [PATCH 2/5] TG-153 Exporter - use signal to graceful monitor shutdown --- exporter.go | 5 +++- pkg/exporter/monitor.go | 21 +++++++++++---- pkg/exporter/passthru.go | 5 +++- pkg/util/signal.go | 55 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 7 deletions(-) create mode 100644 pkg/util/signal.go diff --git a/exporter.go b/exporter.go index 2bef3c21c..80977a1a2 100644 --- a/exporter.go +++ b/exporter.go @@ -23,6 +23,7 @@ package main import ( + "context" "io/ioutil" "os" "os/signal" @@ -30,6 +31,7 @@ import ( "time" "github.com/arangodb/kube-arangodb/pkg/exporter" + "github.com/arangodb/kube-arangodb/pkg/util" "github.com/rs/zerolog/log" "github.com/spf13/cobra" @@ -112,7 +114,8 @@ func cmdExporterCheckE() error { return string(data), nil }, false, 15*time.Second) - go mon.UpdateMonitorStatus() + + go mon.UpdateMonitorStatus(util.CreateSignalContext(context.Background())) exporter := exporter.NewExporter(exporterInput.listenAddress, "/metrics", p) if exporterInput.keyfile != "" { diff --git a/pkg/exporter/monitor.go b/pkg/exporter/monitor.go index 5e9bfecc6..9c7b47948 100644 --- a/pkg/exporter/monitor.go +++ b/pkg/exporter/monitor.go @@ -23,18 +23,20 @@ package exporter import ( + "context" "encoding/json" "errors" "fmt" - "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" "io/ioutil" "net/url" "os" "path" "strings" + "sync/atomic" "time" "github.com/arangodb/go-driver" + "github.com/arangodb/kube-arangodb/pkg/util/k8sutil" "github.com/rs/zerolog/log" ) @@ -45,7 +47,7 @@ const ( failRefreshInterval = time.Duration(time.Second * 15) ) -var currentMembersStatus = "" +var currentMembersStatus atomic.Value func NewMonitor(arangodbEndpoint string, auth Authentication, sslVerify bool, timeout time.Duration) *monitor { uri, err := setPath(arangodbEndpoint, k8sutil.ArangoExporterClusterHealthEndpoint) @@ -65,7 +67,8 @@ type monitor struct { healthURI *url.URL } -func (m monitor) UpdateMonitorStatus() { +// UpdateMonitorStatus load monitor metrics for current cluster into currentMembersStatus +func (m monitor) UpdateMonitorStatus(ctx context.Context) { for { sleep := successRefreshInterval @@ -83,12 +86,19 @@ func (m monitor) UpdateMonitorStatus() { } output.WriteString(entry) } - currentMembersStatus = output.String() + currentMembersStatus.Store(output.String()) + } + + select { + case <-ctx.Done(): + return + case <-time.After(sleep): + continue } - time.Sleep(sleep) } } +// GetClusterHealth returns current ArangoDeployment cluster health status func (m monitor) GetClusterHealth() (*driver.ClusterHealth, error) { c, req, err := m.factory() if err != nil { @@ -114,6 +124,7 @@ func (m monitor) GetClusterHealth() (*driver.ClusterHealth, error) { return &result, err } +// GetMemberStatus returns Prometheus monitor metric for specific member func (m monitor) GetMemberStatus(id driver.ServerID, member driver.ServerHealth) (string, error) { result := fmt.Sprintf(monitorMetricTemplate, member.Role, id, 0) diff --git a/pkg/exporter/passthru.go b/pkg/exporter/passthru.go index d76e1ba1e..339f731cc 100644 --- a/pkg/exporter/passthru.go +++ b/pkg/exporter/passthru.go @@ -124,7 +124,10 @@ func (p passthru) ServeHTTP(resp http.ResponseWriter, req *http.Request) { responseStr = strings.ReplaceAll(responseStr, "guage", "gauge") // Attach monitor data - responseStr = responseStr + currentMembersStatus + monitorData := currentMembersStatus.Load() + if monitorData != nil { + responseStr = responseStr + monitorData.(string) + } resp.WriteHeader(data.StatusCode) _, err = resp.Write([]byte(responseStr)) diff --git a/pkg/util/signal.go b/pkg/util/signal.go new file mode 100644 index 000000000..6a46e4b67 --- /dev/null +++ b/pkg/util/signal.go @@ -0,0 +1,55 @@ +// +// DISCLAIMER +// +// Copyright 2016-2021 ArangoDB GmbH, Cologne, Germany +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright holder is ArangoDB GmbH, Cologne, Germany +// +// Author Jakub Wierzbowski +// + +package util + +import ( + "context" + "os" + "os/signal" + "syscall" +) + +// CreateSignalContext creates and returns the context which is closed when one of the provided signal occurs. +// If the provided list of signals is empty, then SIGINT and SIGTERM is used by default. +func CreateSignalContext(ctx context.Context, signals ...os.Signal) context.Context { + if ctx == nil { + ctx = context.Background() + } + ctxSignal, cancelSignal := context.WithCancel(ctx) + sigChannel := make(chan os.Signal) + + if len(signals) > 0 { + signal.Notify(sigChannel, signals...) + } else { + signal.Notify(sigChannel, os.Interrupt, syscall.SIGTERM) + } + + go func() { + // Wait until signal occurs. + <-sigChannel + // Close the context which is used by the caller. + cancelSignal() + }() + + return ctxSignal +} From eb5f4b5a525474c5610a4efb73bd62d56e0a9e83 Mon Sep 17 00:00:00 2001 From: Tomasz Mielech Date: Fri, 29 Oct 2021 11:29:27 +0200 Subject: [PATCH 3/5] fix linter issues --- Makefile | 2 +- pkg/apis/deployment/v1/license_spec.go | 2 +- pkg/apis/deployment/v2alpha1/license_spec.go | 2 +- pkg/exporter/monitor.go | 7 +++++-- pkg/util/signal.go | 3 ++- 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 072d56b18..aade2920c 100644 --- a/Makefile +++ b/Makefile @@ -391,7 +391,7 @@ init: tools update-generated $(BIN) vendor .PHONY: tools tools: update-vendor @echo ">> Fetching golangci-lint linter" - @GOBIN=$(GOPATH)/bin go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.40.0 + @go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.42.1 @echo ">> Fetching goimports" @GOBIN=$(GOPATH)/bin go get golang.org/x/tools/cmd/goimports@0bb7e5c47b1a31f85d4f173edc878a8e049764a5 @echo ">> Fetching license check" diff --git a/pkg/apis/deployment/v1/license_spec.go b/pkg/apis/deployment/v1/license_spec.go index c79b4d80c..c7e5a7eb5 100644 --- a/pkg/apis/deployment/v1/license_spec.go +++ b/pkg/apis/deployment/v1/license_spec.go @@ -52,7 +52,7 @@ func (s LicenseSpec) Validate() error { } // SetDefaultsFrom fills all values not set in s with values from other -func (s LicenseSpec) SetDefaultsFrom(other LicenseSpec) { +func (s *LicenseSpec) SetDefaultsFrom(other LicenseSpec) { if !s.HasSecretName() { s.SecretName = util.NewStringOrNil(other.SecretName) } diff --git a/pkg/apis/deployment/v2alpha1/license_spec.go b/pkg/apis/deployment/v2alpha1/license_spec.go index 4a4b016b9..dfaf73e6f 100644 --- a/pkg/apis/deployment/v2alpha1/license_spec.go +++ b/pkg/apis/deployment/v2alpha1/license_spec.go @@ -52,7 +52,7 @@ func (s LicenseSpec) Validate() error { } // SetDefaultsFrom fills all values not set in s with values from other -func (s LicenseSpec) SetDefaultsFrom(other LicenseSpec) { +func (s *LicenseSpec) SetDefaultsFrom(other LicenseSpec) { if !s.HasSecretName() { s.SecretName = util.NewStringOrNil(other.SecretName) } diff --git a/pkg/exporter/monitor.go b/pkg/exporter/monitor.go index 9c7b47948..2ca543fdf 100644 --- a/pkg/exporter/monitor.go +++ b/pkg/exporter/monitor.go @@ -43,8 +43,8 @@ import ( const ( monitorMetricTemplate = "arangodb_member_health{role=\"%s\",id=\"%s\"} %d \n" - successRefreshInterval = time.Duration(time.Second * 120) - failRefreshInterval = time.Duration(time.Second * 15) + successRefreshInterval = time.Second * 120 + failRefreshInterval = time.Second * 15 ) var currentMembersStatus atomic.Value @@ -134,6 +134,9 @@ func (m monitor) GetMemberStatus(id driver.ServerID, member driver.ServerHealth) } req.URL, err = setPath(member.Endpoint, k8sutil.ArangoExporterStatusEndpoint) + if err != nil { + return result, err + } resp, err := c.Do(req) if err != nil { diff --git a/pkg/util/signal.go b/pkg/util/signal.go index 6a46e4b67..d6fb2f00a 100644 --- a/pkg/util/signal.go +++ b/pkg/util/signal.go @@ -36,7 +36,7 @@ func CreateSignalContext(ctx context.Context, signals ...os.Signal) context.Cont ctx = context.Background() } ctxSignal, cancelSignal := context.WithCancel(ctx) - sigChannel := make(chan os.Signal) + sigChannel := make(chan os.Signal, 2) if len(signals) > 0 { signal.Notify(sigChannel, signals...) @@ -47,6 +47,7 @@ func CreateSignalContext(ctx context.Context, signals ...os.Signal) context.Cont go func() { // Wait until signal occurs. <-sigChannel + close(sigChannel) // Close the context which is used by the caller. cancelSignal() }() From f376c3971094b89c6c90cf55ea5db36007154298 Mon Sep 17 00:00:00 2001 From: jwierzbo Date: Fri, 29 Oct 2021 11:50:29 +0200 Subject: [PATCH 4/5] TG-153 Exporter - fix linter errors --- pkg/storage/provisioner/service/provisioner.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/storage/provisioner/service/provisioner.go b/pkg/storage/provisioner/service/provisioner.go index ee2f42089..ca8aa8108 100644 --- a/pkg/storage/provisioner/service/provisioner.go +++ b/pkg/storage/provisioner/service/provisioner.go @@ -84,10 +84,10 @@ func (p *Provisioner) GetInfo(ctx context.Context, localPath string) (provisione } // Available is blocks available * fragment size - available := int64(statfs.Bavail) * statfs.Bsize + available := int64(statfs.Bavail) * int64(statfs.Bsize) // Capacity is total block count * fragment size - capacity := int64(statfs.Blocks) * statfs.Bsize + capacity := int64(statfs.Blocks) * int64(statfs.Bsize) log.Debug(). Str("node-name", p.NodeName). From 9b616fa4d72ffe5df1425538937e85bd0fc39d16 Mon Sep 17 00:00:00 2001 From: jwierzbo Date: Fri, 29 Oct 2021 12:31:27 +0200 Subject: [PATCH 5/5] TG-153 Exporter - fix linter errors - macOS --- pkg/storage/provisioner/service/provisioner.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/storage/provisioner/service/provisioner.go b/pkg/storage/provisioner/service/provisioner.go index ca8aa8108..e9bf33e52 100644 --- a/pkg/storage/provisioner/service/provisioner.go +++ b/pkg/storage/provisioner/service/provisioner.go @@ -84,10 +84,10 @@ func (p *Provisioner) GetInfo(ctx context.Context, localPath string) (provisione } // Available is blocks available * fragment size - available := int64(statfs.Bavail) * int64(statfs.Bsize) + available := int64(statfs.Bavail) * statfs.Bsize // nolint:typecheck // Capacity is total block count * fragment size - capacity := int64(statfs.Blocks) * int64(statfs.Bsize) + capacity := int64(statfs.Blocks) * statfs.Bsize // nolint:typecheck log.Debug(). Str("node-name", p.NodeName).