From 8bc8060d6ff4f351920cfe81473676ee6ee28109 Mon Sep 17 00:00:00 2001 From: Cyril Tovena Date: Thu, 19 Dec 2019 14:01:59 -0500 Subject: [PATCH] improve stackdriver metric type (#1132) * improve stackdriver metric type Signed-off-by: Cyril Tovena --- cmd/allocator/main.go | 8 ++- cmd/controller/main.go | 9 ++- go.mod | 2 +- go.sum | 2 + install/helm/agones/templates/controller.yaml | 12 ++++ .../agones/templates/service/allocation.yaml | 14 ++++- install/helm/agones/values.yaml | 1 + install/yaml/install.yaml | 24 ++++++++ pkg/metrics/exporter.go | 56 +++++++++++++++++-- pkg/metrics/util.go | 29 ++++++++++ pkg/metrics/util_test.go | 48 ++++++++++++++++ site/content/en/docs/Installation/helm.md | 7 ++- 12 files changed, 200 insertions(+), 12 deletions(-) diff --git a/cmd/allocator/main.go b/cmd/allocator/main.go index c488e3b9cc..e0cc7785cc 100644 --- a/cmd/allocator/main.go +++ b/cmd/allocator/main.go @@ -65,6 +65,7 @@ const ( enableStackdriverMetricsFlag = "stackdriver-exporter" enablePrometheusMetricsFlag = "prometheus-exporter" projectIDFlag = "gcp-project-id" + stackdriverLabels = "stackdriver-labels" ) func init() { @@ -333,6 +334,7 @@ type config struct { PrometheusMetrics bool Stackdriver bool GCPProjectID string + StackdriverLabels string } func parseEnvFlags() config { @@ -340,22 +342,26 @@ func parseEnvFlags() config { viper.SetDefault(enablePrometheusMetricsFlag, true) viper.SetDefault(enableStackdriverMetricsFlag, false) viper.SetDefault(projectIDFlag, "") + viper.SetDefault(stackdriverLabels, "") pflag.Bool(enablePrometheusMetricsFlag, viper.GetBool(enablePrometheusMetricsFlag), "Flag to activate metrics of Agones. Can also use PROMETHEUS_EXPORTER env variable.") pflag.Bool(enableStackdriverMetricsFlag, viper.GetBool(enableStackdriverMetricsFlag), "Flag to activate stackdriver monitoring metrics for Agones. Can also use STACKDRIVER_EXPORTER env variable.") pflag.String(projectIDFlag, viper.GetString(projectIDFlag), "GCP ProjectID used for Stackdriver, if not specified ProjectID from Application Default Credentials would be used. Can also use GCP_PROJECT_ID env variable.") + pflag.String(stackdriverLabels, viper.GetString(stackdriverLabels), "A set of default labels to add to all stackdriver metrics generated. By default metadata are automatically added using Kubernetes API and GCP metadata enpoint.") pflag.Parse() viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_")) runtime.Must(viper.BindEnv(enablePrometheusMetricsFlag)) runtime.Must(viper.BindEnv(enableStackdriverMetricsFlag)) runtime.Must(viper.BindEnv(projectIDFlag)) + runtime.Must(viper.BindEnv(stackdriverLabels)) runtime.Must(viper.BindPFlags(pflag.CommandLine)) return config{ PrometheusMetrics: viper.GetBool(enablePrometheusMetricsFlag), Stackdriver: viper.GetBool(enableStackdriverMetricsFlag), GCPProjectID: viper.GetString(projectIDFlag), + StackdriverLabels: viper.GetString(stackdriverLabels), } } @@ -371,7 +377,7 @@ func setupMetricsRecorder(conf config) (health healthcheck.Handler, closer func( // Stackdriver metrics if conf.Stackdriver { - sd, err := metrics.RegisterStackdriverExporter(conf.GCPProjectID) + sd, err := metrics.RegisterStackdriverExporter(conf.GCPProjectID, conf.StackdriverLabels) if err != nil { logger.WithError(err).Fatal("Could not register stackdriver exporter") } diff --git a/cmd/controller/main.go b/cmd/controller/main.go index b2b61699b2..1ebfdf0928 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -53,6 +53,7 @@ import ( const ( enableStackdriverMetricsFlag = "stackdriver-exporter" + stackdriverLabels = "stackdriver-labels" enablePrometheusMetricsFlag = "prometheus-exporter" projectIDFlag = "gcp-project-id" sidecarImageFlag = "sidecar-image" @@ -158,7 +159,7 @@ func main() { // Stackdriver metrics if ctlConf.Stackdriver { - sd, err := metrics.RegisterStackdriverExporter(ctlConf.GCPProjectID) + sd, err := metrics.RegisterStackdriverExporter(ctlConf.GCPProjectID, ctlConf.StackdriverLabels) if err != nil { logger.WithError(err).Fatal("Could not register stackdriver exporter") } @@ -240,6 +241,8 @@ func parseEnvFlags() config { viper.SetDefault(keyFileFlag, filepath.Join(base, "certs/server.key")) viper.SetDefault(enablePrometheusMetricsFlag, true) viper.SetDefault(enableStackdriverMetricsFlag, false) + viper.SetDefault(stackdriverLabels, "") + viper.SetDefault(projectIDFlag, "") viper.SetDefault(numWorkersFlag, 64) viper.SetDefault(apiServerSustainedQPSFlag, 100) @@ -260,6 +263,7 @@ func parseEnvFlags() config { pflag.String(kubeconfigFlag, viper.GetString(kubeconfigFlag), "Optional. kubeconfig to run the controller out of the cluster. Only use it for debugging as webhook won't works.") pflag.Bool(enablePrometheusMetricsFlag, viper.GetBool(enablePrometheusMetricsFlag), "Flag to activate metrics of Agones. Can also use PROMETHEUS_EXPORTER env variable.") pflag.Bool(enableStackdriverMetricsFlag, viper.GetBool(enableStackdriverMetricsFlag), "Flag to activate stackdriver monitoring metrics for Agones. Can also use STACKDRIVER_EXPORTER env variable.") + pflag.String(stackdriverLabels, viper.GetString(stackdriverLabels), "A set of default labels to add to all stackdriver metrics generated. By default metadata are automatically added using Kubernetes API and GCP metadata enpoint.") pflag.String(projectIDFlag, viper.GetString(projectIDFlag), "GCP ProjectID used for Stackdriver, if not specified ProjectID from Application Default Credentials would be used. Can also use GCP_PROJECT_ID env variable.") pflag.Int32(numWorkersFlag, 64, "Number of controller workers per resource type") pflag.Int32(apiServerSustainedQPSFlag, 100, "Maximum sustained queries per second to send to the API server") @@ -282,6 +286,7 @@ func parseEnvFlags() config { runtime.Must(viper.BindEnv(kubeconfigFlag)) runtime.Must(viper.BindEnv(enablePrometheusMetricsFlag)) runtime.Must(viper.BindEnv(enableStackdriverMetricsFlag)) + runtime.Must(viper.BindEnv(stackdriverLabels)) runtime.Must(viper.BindEnv(projectIDFlag)) runtime.Must(viper.BindPFlags(pflag.CommandLine)) runtime.Must(viper.BindEnv(numWorkersFlag)) @@ -321,6 +326,7 @@ func parseEnvFlags() config { LogDir: viper.GetString(logDirFlag), LogLevel: viper.GetString(logLevelFlag), LogSizeLimitMB: int(viper.GetInt32(logSizeLimitMBFlag)), + StackdriverLabels: viper.GetString(stackdriverLabels), } } @@ -335,6 +341,7 @@ type config struct { AlwaysPullSidecar bool PrometheusMetrics bool Stackdriver bool + StackdriverLabels string KeyFile string CertFile string KubeConfig string diff --git a/go.mod b/go.mod index 4bf1f3488f..d4906874b0 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module agones.dev/agones go 1.12 require ( - cloud.google.com/go v0.34.0 // indirect + cloud.google.com/go v0.34.0 contrib.go.opencensus.io/exporter/stackdriver v0.8.0 fortio.org/fortio v1.3.1 github.com/ahmetb/gen-crd-api-reference-docs v0.1.1 diff --git a/go.sum b/go.sum index 0168bcd378..285e35ac38 100644 --- a/go.sum +++ b/go.sum @@ -139,6 +139,8 @@ github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGV github.com/onsi/gomega v1.5.0 h1:izbySO9zDPmjJ8rDjLvkA2zJHIo+HkYXHnf7eN7SSyo= github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8= +github.com/pborman/uuid v0.0.0-20180906182336-adf5a7427709 h1:zNBQb37RGLmJybyMcs983HfUfpkw9OTFD9tbBfAViHE= +github.com/pborman/uuid v0.0.0-20180906182336-adf5a7427709/go.mod h1:VyrYX9gd7irzKovcSS6BIIEwPRkP2Wm2m9ufcdFSJ34= github.com/pborman/uuid v1.2.0 h1:J7Q5mO4ysT1dv8hyrUGHb9+ooztCXu1D8MY8DZYsu3g= github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc= diff --git a/install/helm/agones/templates/controller.yaml b/install/helm/agones/templates/controller.yaml index 41520b2569..a3dcc8ca73 100644 --- a/install/helm/agones/templates/controller.yaml +++ b/install/helm/agones/templates/controller.yaml @@ -90,6 +90,8 @@ spec: value: {{ .Values.agones.metrics.prometheusEnabled | quote }} - name: STACKDRIVER_EXPORTER value: {{ .Values.agones.metrics.stackdriverEnabled | quote }} + - name: STACKDRIVER_LABELS + value: {{ .Values.agones.metrics.stackdriverLabels | quote }} - name: GCP_PROJECT_ID value: {{ .Values.agones.metrics.stackdriverProjectID | quote }} - name: SIDECAR_CPU_LIMIT @@ -108,6 +110,16 @@ spec: - name: LOG_SIZE_LIMIT_MB value: {{ .Values.agones.controller.persistentLogsSizeLimitMB | quote }} {{- end }} + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: CONTAINER_NAME + value: "agones-controller" livenessProbe: httpGet: path: /live diff --git a/install/helm/agones/templates/service/allocation.yaml b/install/helm/agones/templates/service/allocation.yaml index 72d78c97d1..13b1fa5208 100644 --- a/install/helm/agones/templates/service/allocation.yaml +++ b/install/helm/agones/templates/service/allocation.yaml @@ -112,6 +112,18 @@ spec: value: {{ .Values.agones.metrics.stackdriverEnabled | quote }} - name: GCP_PROJECT_ID value: {{ .Values.agones.metrics.stackdriverProjectID | quote }} + - name: STACKDRIVER_LABELS + value: {{ .Values.agones.metrics.stackdriverLabels | quote }} + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: CONTAINER_NAME + value: "agones-allocator" ports: - name: https containerPort: 8443 @@ -192,7 +204,7 @@ roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: agones-allocator - + {{- end }} --- diff --git a/install/helm/agones/values.yaml b/install/helm/agones/values.yaml index c18c1c3743..2ccd3dcf91 100644 --- a/install/helm/agones/values.yaml +++ b/install/helm/agones/values.yaml @@ -20,6 +20,7 @@ agones: prometheusServiceDiscovery: true stackdriverEnabled: false stackdriverProjectID: "" + stackdriverLabels: "" rbacEnabled: true registerServiceAccounts: true registerWebhooks: true diff --git a/install/yaml/install.yaml b/install/yaml/install.yaml index 026fa67004..b52f6d5f6d 100644 --- a/install/yaml/install.yaml +++ b/install/yaml/install.yaml @@ -1137,6 +1137,18 @@ spec: value: "false" - name: GCP_PROJECT_ID value: "" + - name: STACKDRIVER_LABELS + value: "" + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: CONTAINER_NAME + value: "agones-allocator" ports: - name: https containerPort: 8443 @@ -1352,6 +1364,8 @@ spec: value: "true" - name: STACKDRIVER_EXPORTER value: "false" + - name: STACKDRIVER_LABELS + value: "" - name: GCP_PROJECT_ID value: "" - name: SIDECAR_CPU_LIMIT @@ -1368,6 +1382,16 @@ spec: value: "/home/agones/logs" - name: LOG_SIZE_LIMIT_MB value: "10000" + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: CONTAINER_NAME + value: "agones-controller" livenessProbe: httpGet: path: /live diff --git a/pkg/metrics/exporter.go b/pkg/metrics/exporter.go index 4f259fed34..3a6d50784d 100644 --- a/pkg/metrics/exporter.go +++ b/pkg/metrics/exporter.go @@ -16,12 +16,16 @@ package metrics import ( "net/http" + "os" "time" + "cloud.google.com/go/compute/metadata" "contrib.go.opencensus.io/exporter/stackdriver" + "github.com/pkg/errors" prom "github.com/prometheus/client_golang/prometheus" "go.opencensus.io/exporter/prometheus" "go.opencensus.io/stats/view" + "google.golang.org/genproto/googleapis/api/monitoredres" ) // RegisterPrometheusExporter register a prometheus exporter to OpenCensus with a given prometheus metric registry. @@ -48,20 +52,30 @@ func RegisterPrometheusExporter(registry *prom.Registry) (http.Handler, error) { // RegisterStackdriverExporter register a Stackdriver exporter to OpenCensus. // It will add Agones metrics into Stackdriver on Google Cloud. -func RegisterStackdriverExporter(projectID string) (sd *stackdriver.Exporter, err error) { - // Default project will be used - sd, err = stackdriver.NewExporter(stackdriver.Options{ +func RegisterStackdriverExporter(projectID string, defaultLabels string) (*stackdriver.Exporter, error) { + monitoredRes, err := getMonitoredResource(projectID) + if err != nil { + logger.WithError(err).Warn("error discovering monitored resource") + } + labels, err := parseLabels(defaultLabels) + if err != nil { + return nil, err + } + + sd, err := stackdriver.NewExporter(stackdriver.Options{ ProjectID: projectID, // MetricPrefix helps uniquely identify your metrics. - MetricPrefix: "agones", + MetricPrefix: "agones", + Resource: monitoredRes, + DefaultMonitoringLabels: labels, }) if err != nil { - return + return nil, err } // Register it as a metrics exporter view.RegisterExporter(sd) - return + return sd, nil } // SetReportingPeriod set appropriate reporting period which depends on exporters @@ -79,3 +93,33 @@ func SetReportingPeriod(prometheus, stackdriver bool) { view.SetReportingPeriod(reportingPeriod) } } + +func getMonitoredResource(projectID string) (*monitoredres.MonitoredResource, error) { + instanceID, err := metadata.InstanceID() + if err != nil { + return nil, errors.Wrap(err, "error getting instance ID") + } + zone, err := metadata.Zone() + if err != nil { + return nil, errors.Wrap(err, "error getting zone") + } + clusterName, err := metadata.InstanceAttributeValue("cluster-name") + if err != nil { + return nil, errors.Wrap(err, "error getting cluster-name") + } + + return &monitoredres.MonitoredResource{ + Type: "k8s_container", + Labels: map[string]string{ + "project_id": projectID, + "instance_id": instanceID, + "zone": zone, + "cluster_name": clusterName, + + // See: https://kubernetes.io/docs/tasks/inject-data-application/environment-variable-expose-pod-information/ + "namespace_id": os.Getenv("POD_NAMESPACE"), + "pod_id": os.Getenv("POD_NAME"), + "container_name": os.Getenv("CONTAINER_NAME"), + }, + }, nil +} diff --git a/pkg/metrics/util.go b/pkg/metrics/util.go index 120936e662..189eace637 100644 --- a/pkg/metrics/util.go +++ b/pkg/metrics/util.go @@ -16,8 +16,13 @@ package metrics import ( "context" + "errors" + "fmt" + "strings" + "unicode/utf8" "agones.dev/agones/pkg/util/runtime" + "contrib.go.opencensus.io/exporter/stackdriver" "go.opencensus.io/stats" "go.opencensus.io/tag" ) @@ -48,3 +53,27 @@ func MustTagKey(key string) tag.Key { } return t } + +func parseLabels(s string) (*stackdriver.Labels, error) { + res := &stackdriver.Labels{} + if s == "" { + return res, nil + } + pairs := strings.Split(s, ",") + for _, p := range pairs { + keyValue := strings.Split(p, "=") + if len(keyValue) != 2 { + return nil, fmt.Errorf("invalid labels: %s, expect key=value,key2=value2", s) + } + key := strings.TrimSpace(keyValue[0]) + value := strings.TrimSpace(keyValue[1]) + if !utf8.ValidString(key) || !utf8.ValidString(value) { + return nil, errors.New("invalid labels: must be a valid utf-8 string") + } + if key == "" || value == "" { + return nil, errors.New("invalid labels: must not be empty string") + } + res.Set(key, value, "") + } + return res, nil +} diff --git a/pkg/metrics/util_test.go b/pkg/metrics/util_test.go index c1c7c7ac78..43e772e942 100644 --- a/pkg/metrics/util_test.go +++ b/pkg/metrics/util_test.go @@ -16,11 +16,13 @@ package metrics import ( "context" + "reflect" "testing" agonesv1 "agones.dev/agones/pkg/apis/agones/v1" autoscalingv1 "agones.dev/agones/pkg/apis/autoscaling/v1" agtesting "agones.dev/agones/pkg/testing" + "contrib.go.opencensus.io/exporter/stackdriver" "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -291,3 +293,49 @@ agones_gameservers_node_count_count 3 agones_nodes_count{empty="false"} 2 agones_nodes_count{empty="true"} 1 ` + +func Test_parseLabels(t *testing.T) { + tests := []struct { + input string + want *stackdriver.Labels + wantErr bool + }{ + // valids + {"", labelsFromMap(nil), false}, + {"a=b", labelsFromMap(map[string]string{"a": "b"}), false}, + {"a=b,c=d", labelsFromMap(map[string]string{"a": "b", "c": "d"}), false}, + {"a=b, c=d", labelsFromMap(map[string]string{"a": "b", "c": "d"}), false}, + {"a=b , c = d ", labelsFromMap(map[string]string{"a": "b", "c": "d"}), false}, + {" a = b , c = d ", labelsFromMap(map[string]string{"a": "b", "c": "d"}), false}, + {" a = b , c = d,c=f ", labelsFromMap(map[string]string{"a": "b", "c": "f"}), false}, + + // errors + {"e", nil, true}, + {"a=b,", nil, true}, + {"a= =,", nil, true}, + {"a=b,c", nil, true}, + {"a=b,c =", nil, true}, + {"a=b , c ==", nil, true}, + {"a=b , c =\xc3\x28", nil, true}, + } + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got, err := parseLabels(tt.input) + if (err != nil) != tt.wantErr { + t.Errorf("parseLabels() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("parseLabels() = %v, want %v", got, tt.want) + } + }) + } +} + +func labelsFromMap(m map[string]string) *stackdriver.Labels { + res := &stackdriver.Labels{} + for k, v := range m { + res.Set(k, v, "") + } + return res +} diff --git a/site/content/en/docs/Installation/helm.md b/site/content/en/docs/Installation/helm.md index 0999e8e809..84b27f805a 100644 --- a/site/content/en/docs/Installation/helm.md +++ b/site/content/en/docs/Installation/helm.md @@ -32,7 +32,7 @@ you can use the helm `--namespace` parameter to specify a different namespace._ When running in production, Agones should be scheduled on a dedicated pool of nodes, distinct from where Game Servers are scheduled for better isolation and resiliency. By default Agones prefers to be scheduled on nodes labeled with `agones.dev/agones-system=true` and tolerates node taint `agones.dev/agones-system=true:NoExecute`. If no dedicated nodes are available, Agones will run on regular nodes, but that's not recommended for production use. For instructions on setting up a dedicated node -pool for Agones, see the [Agones installation instructions]({{< relref "../_index.md" >}}) for your preferred environment. +pool for Agones, see the [Agones installation instructions]({{< relref "../_index.md" >}}) for your preferred environment. The command deploys Agones on the Kubernetes cluster with the default configuration. The [configuration](#configuration) section lists the parameters that can be configured during installation. @@ -110,7 +110,10 @@ The following tables lists the configurable parameters of the Agones chart and t | `agones.metrics.prometheusServiceDiscovery` | Adds annotations for Prometheus ServiceDiscovery (and also Strackdriver) | `true` | | `agones.metrics.prometheusEnabled` | Enables controller metrics on port `8080` and path `/metrics` | `true` | | `agones.metrics.stackdriverEnabled` | Enables Stackdriver exporter of controller metrics | `false` | -| `agones.metrics.stackdriverProjectID` | This overrides the default gcp project id for use with Stackdriver | `` | +| `agones.metrics.stackdriverProjectID` | This overrides the default gcp project id for use with stackdriver | `` | +{{% feature publishVersion="1.3.0" %}} +| `agones.metrics.stackdriverLabels` | A set of default labels to add to all stackdriver metrics generated in form of key value pair (`key=value,key2=value2`). By default metadata are automatically added using Kubernetes API and GCP metadata enpoint. | `` | +{{% /feature %}} | `agones.serviceaccount.controller` | Service account name for the controller | `agones-controller` | | `agones.serviceaccount.sdk` | Service account name for the sdk | `agones-sdk` | | `agones.image.registry` | Global image registry for all images | `gcr.io/agones-images` |