diff --git a/chart/templates/ws-manager-bridge-deployment.yaml b/chart/templates/ws-manager-bridge-deployment.yaml index 87e4da1877bf22..572ae58ba1170d 100644 --- a/chart/templates/ws-manager-bridge-deployment.yaml +++ b/chart/templates/ws-manager-bridge-deployment.yaml @@ -43,6 +43,7 @@ spec: {{ include "gitpod.databaseWaiter.container" $this | indent 6 }} {{ include "gitpod.msgbusWaiter.container" $this | indent 6 }} containers: +{{ include "gitpod.kube-rbac-proxy" $this | indent 6 }} - name: ws-manager-bridge image: {{ template "gitpod.comp.imageFull" $this }} {{ include "gitpod.container.resources" $this | indent 8 }} diff --git a/chart/templates/ws-proxy-configmap.yaml b/chart/templates/ws-proxy-configmap.yaml index d03072c3b0b13e..52190bbf3a2566 100644 --- a/chart/templates/ws-proxy-configmap.yaml +++ b/chart/templates/ws-proxy-configmap.yaml @@ -65,6 +65,6 @@ data: }, "pprofAddr": ":6060", "readinessProbeAddr": ":60088", - "prometheusAddr": ":60095" + "prometheusAddr": "localhost:9500" } {{- end -}} \ No newline at end of file diff --git a/chart/templates/ws-proxy-deployment.yaml b/chart/templates/ws-proxy-deployment.yaml index da5ae98250af83..6c723d52931b18 100644 --- a/chart/templates/ws-proxy-deployment.yaml +++ b/chart/templates/ws-proxy-deployment.yaml @@ -54,6 +54,7 @@ spec: {{- end }} enableServiceLinks: false containers: +{{ include "gitpod.kube-rbac-proxy" $this | indent 6 }} - name: ws-proxy image: {{ template "gitpod.comp.imageFull" $this }} args: ["run", "-v", "/config/config.json"] diff --git a/components/common-go/go.mod b/components/common-go/go.mod index 9013ac4a95ce88..e512a3e5139bc1 100644 --- a/components/common-go/go.mod +++ b/components/common-go/go.mod @@ -8,6 +8,7 @@ require ( github.com/go-test/deep v1.0.5 github.com/google/go-cmp v0.5.6 github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 + github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 github.com/opentracing/opentracing-go v1.2.0 github.com/prometheus/client_golang v1.11.0 github.com/segmentio/backo-go v0.0.0-20200129164019-23eae7c10bd3 // indirect diff --git a/components/common-go/go.sum b/components/common-go/go.sum index 72796b2d9181ff..ce1eb3b7c8fca3 100644 --- a/components/common-go/go.sum +++ b/components/common-go/go.sum @@ -103,6 +103,8 @@ github.com/googleapis/gnostic v0.5.5/go.mod h1:7+EbHbldMins07ALC74bsA81Ovc97Dwqy github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= diff --git a/components/common-go/grpc/grpc.go b/components/common-go/grpc/grpc.go index 746f6efb595cba..03d80df8a18a5e 100644 --- a/components/common-go/grpc/grpc.go +++ b/components/common-go/grpc/grpc.go @@ -10,7 +10,9 @@ import ( "github.com/gitpod-io/gitpod/common-go/log" grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware" grpc_opentracing "github.com/grpc-ecosystem/go-grpc-middleware/tracing/opentracing" + grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus" "github.com/opentracing/opentracing-go" + "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" "google.golang.org/grpc" "google.golang.org/grpc/backoff" @@ -22,14 +24,38 @@ import ( // grpc library default is 4MB const maxMsgSize = 1024 * 1024 * 16 +var defaultClientOptionsConfig struct { + Metrics *grpc_prometheus.ClientMetrics +} + +// ClientMetrics produces client-side gRPC metrics +func ClientMetrics() prometheus.Collector { + res := grpc_prometheus.NewClientMetrics() + defaultClientOptionsConfig.Metrics = res + return res +} + // DefaultClientOptions returns the default grpc client connection options func DefaultClientOptions() []grpc.DialOption { bfConf := backoff.DefaultConfig bfConf.MaxDelay = 5 * time.Second - return []grpc.DialOption{ - grpc.WithUnaryInterceptor(grpc_opentracing.UnaryClientInterceptor(grpc_opentracing.WithTracer(opentracing.GlobalTracer()))), - grpc.WithStreamInterceptor(grpc_opentracing.StreamClientInterceptor(grpc_opentracing.WithTracer(opentracing.GlobalTracer()))), + var ( + unaryInterceptor = []grpc.UnaryClientInterceptor{ + grpc_opentracing.UnaryClientInterceptor(grpc_opentracing.WithTracer(opentracing.GlobalTracer())), + } + streamInterceptor = []grpc.StreamClientInterceptor{ + grpc_opentracing.StreamClientInterceptor(grpc_opentracing.WithTracer(opentracing.GlobalTracer())), + } + ) + if defaultClientOptionsConfig.Metrics != nil { + unaryInterceptor = append(unaryInterceptor, defaultClientOptionsConfig.Metrics.UnaryClientInterceptor()) + streamInterceptor = append(streamInterceptor, defaultClientOptionsConfig.Metrics.StreamClientInterceptor()) + } + + res := []grpc.DialOption{ + grpc.WithUnaryInterceptor(grpc_middleware.ChainUnaryClient(unaryInterceptor...)), + grpc.WithStreamInterceptor(grpc_middleware.ChainStreamClient(streamInterceptor...)), grpc.WithBlock(), grpc.WithConnectParams(grpc.ConnectParams{ Backoff: bfConf, @@ -41,6 +67,8 @@ func DefaultClientOptions() []grpc.DialOption { }), grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxMsgSize)), } + + return res } // DefaultServerOptions returns the default ServerOption sets options for internal components diff --git a/components/content-service-api/typescript/src/client-call-metrics.ts b/components/content-service-api/typescript/src/client-call-metrics.ts new file mode 100644 index 00000000000000..8011907b518be4 --- /dev/null +++ b/components/content-service-api/typescript/src/client-call-metrics.ts @@ -0,0 +1,88 @@ +/** + * Copyright (c) 2021 Gitpod GmbH. All rights reserved. + * Licensed under the GNU Affero General Public License (AGPL). + * See License-AGPL.txt in the project root for license information. + */ + +import * as grpc from "@grpc/grpc-js"; +import { Status } from "@grpc/grpc-js/build/src/constants"; + +type GrpcMethodType = 'unary' | 'client_stream' | 'server_stream' | 'bidi_stream'; +export interface IGrpcCallMetricsLabels { + service: string, + method: string, + type: GrpcMethodType, +} + +export interface IGrpcCallMetricsLabelsWithCode extends IGrpcCallMetricsLabels { + code: string +} + +export const IClientCallMetrics = Symbol("IClientCallMetrics"); + +export interface IClientCallMetrics { + handled(labels: IGrpcCallMetricsLabelsWithCode) : void; + received(labels: IGrpcCallMetricsLabels) : void; + sent(labels: IGrpcCallMetricsLabels) : void; + started(labels: IGrpcCallMetricsLabels) : void; +} + +export function getGrpcMethodType(requestStream: boolean, responseStream: boolean): GrpcMethodType { + if (requestStream) { + if (responseStream) { + return 'bidi_stream'; + } else { + return 'client_stream'; + } + } else { + if (responseStream) { + return 'server_stream'; + } else { + return 'unary'; + } + } +} + +export function createClientCallMetricsInterceptor(metrics: IClientCallMetrics): grpc.Interceptor { + return (options, nextCall): grpc.InterceptingCall => { + const methodDef = options.method_definition; + const method = methodDef.path.substring(methodDef.path.lastIndexOf('/') + 1); + const service = methodDef.path.substring(1, methodDef.path.length - method.length - 1); + const labels = { + service, + method, + type: getGrpcMethodType(options.method_definition.requestStream, options.method_definition.responseStream) + }; + const requester = new grpc.RequesterBuilder() + .withStart((metadata, listener, next) => { + const newListener = new grpc.ListenerBuilder().withOnReceiveStatus((status, next) => { + try { + metrics.handled({ + ...labels, + code: Status[status.code] + }); + } finally { + next(status); + } + }).withOnReceiveMessage((message, next) => { + try { + metrics.received(labels); + } finally { + next(message); + } + }).build() + try { + metrics.started(labels); + } finally { + next(metadata, newListener); + } + }).withSendMessage((message, next) => { + try { + metrics.sent(labels); + } finally { + next(message); + } + }).build(); + return new grpc.InterceptingCall(nextCall(options), requester); + }; +} diff --git a/components/ee/ws-scheduler/cmd/run.go b/components/ee/ws-scheduler/cmd/run.go index bac7e22d043f78..b175d41572084c 100644 --- a/components/ee/ws-scheduler/cmd/run.go +++ b/components/ee/ws-scheduler/cmd/run.go @@ -15,6 +15,7 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/spf13/cobra" + common_grpc "github.com/gitpod-io/gitpod/common-go/grpc" "github.com/gitpod-io/gitpod/common-go/log" "github.com/gitpod-io/gitpod/common-go/pprof" "github.com/gitpod-io/gitpod/ws-scheduler/pkg/scaler" @@ -40,6 +41,27 @@ var runCmd = &cobra.Command{ } log.Info("connected to Kubernetes") + reg := prometheus.NewRegistry() + if config.Prometheus.Addr != "" { + prometheus.WrapRegistererWithPrefix("gitpod_ws_scheduler_", reg).MustRegister(schedMetrics.AllMetrics...) + reg.MustRegister(common_grpc.ClientMetrics()) + + handler := http.NewServeMux() + handler.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{})) + + go func() { + err := http.ListenAndServe(config.Prometheus.Addr, handler) + if err != nil { + log.WithError(err).Error("Prometheus metrics server failed") + } + }() + log.WithField("addr", config.Prometheus.Addr).Info("started Prometheus metrics server") + } + + if config.PProf.Addr != "" { + go pprof.Serve(config.PProf.Addr) + } + scheduler, err := sched.NewScheduler(config.Scheduler, clientSet) if err != nil { log.WithError(err).Fatal("cannot create scheduler") @@ -59,8 +81,6 @@ var runCmd = &cobra.Command{ log.Info("ws-scheduler shut down") }() - reg := prometheus.NewRegistry() - if config.Scaler.Enabled { controller, err := scaler.NewController(config.Scaler.Controller) if err != nil { @@ -80,25 +100,6 @@ var runCmd = &cobra.Command{ log.WithField("controller", config.Scaler.Controller.Kind).Info("started scaler") } - if config.Prometheus.Addr != "" { - prometheus.WrapRegistererWithPrefix("gitpod_ws_scheduler_", reg).MustRegister(schedMetrics.AllMetrics...) - - handler := http.NewServeMux() - handler.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{})) - - go func() { - err := http.ListenAndServe(config.Prometheus.Addr, handler) - if err != nil { - log.WithError(err).Error("Prometheus metrics server failed") - } - }() - log.WithField("addr", config.Prometheus.Addr).Info("started Prometheus metrics server") - } - - if config.PProf.Addr != "" { - go pprof.Serve(config.PProf.Addr) - } - log.Info("🗓️ ws-scheduler is up and running. Stop with SIGINT or CTRL+C") // Run until we're told to stop diff --git a/components/ee/ws-scheduler/go.mod b/components/ee/ws-scheduler/go.mod index 5300ec28eca221..214a0acee83a46 100644 --- a/components/ee/ws-scheduler/go.mod +++ b/components/ee/ws-scheduler/go.mod @@ -7,7 +7,7 @@ require ( github.com/gitpod-io/gitpod/common-go v0.0.0-00010101000000-000000000000 github.com/gitpod-io/gitpod/content-service/api v0.0.0-00010101000000-000000000000 github.com/gitpod-io/gitpod/ws-manager/api v0.0.0-00010101000000-000000000000 - github.com/go-ozzo/ozzo-validation v3.5.0+incompatible + github.com/go-ozzo/ozzo-validation v3.6.0+incompatible github.com/golang/mock v1.6.0 github.com/google/go-cmp v0.5.6 github.com/google/uuid v1.1.2 @@ -19,8 +19,8 @@ require ( golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 google.golang.org/grpc v1.39.1 google.golang.org/protobuf v1.27.1 - k8s.io/api v0.22.0 - k8s.io/apimachinery v0.22.0 + k8s.io/api v0.22.1 + k8s.io/apimachinery v0.22.1 k8s.io/client-go v0.22.0 k8s.io/component-helpers v0.22.0 ) @@ -37,6 +37,7 @@ require ( github.com/google/gofuzz v1.1.0 // indirect github.com/googleapis/gnostic v0.5.5 // indirect github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect + github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/imdario/mergo v0.3.5 // indirect github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/json-iterator/go v1.1.11 // indirect diff --git a/components/ee/ws-scheduler/go.sum b/components/ee/ws-scheduler/go.sum index 50b54b8fcd9eee..28c6339ec66ace 100644 --- a/components/ee/ws-scheduler/go.sum +++ b/components/ee/ws-scheduler/go.sum @@ -112,8 +112,8 @@ github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTg github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= github.com/go-openapi/jsonreference v0.19.3/go.mod h1:rjx6GuL8TTa9VaixXglHmQmIL98+wF9xc8zWvFonSJ8= github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= -github.com/go-ozzo/ozzo-validation v3.5.0+incompatible h1:sUy/in/P6askYr16XJgTKq/0SZhiWsdg4WZGaLsGQkM= -github.com/go-ozzo/ozzo-validation v3.5.0+incompatible/go.mod h1:gsEKFIVnabGBt6mXmxK0MoFy+cZoTJY6mu5Ll3LVLBU= +github.com/go-ozzo/ozzo-validation v3.6.0+incompatible h1:msy24VGS42fKO9K1vLz82/GeYW1cILu7Nuuj1N3BBkE= +github.com/go-ozzo/ozzo-validation v3.6.0+incompatible/go.mod h1:gsEKFIVnabGBt6mXmxK0MoFy+cZoTJY6mu5Ll3LVLBU= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= @@ -184,6 +184,7 @@ github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:Fecb github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= diff --git a/components/gitpod-protocol/src/messaging/client-call-metrics.ts b/components/gitpod-protocol/src/messaging/client-call-metrics.ts new file mode 100644 index 00000000000000..47cd048d353e4c --- /dev/null +++ b/components/gitpod-protocol/src/messaging/client-call-metrics.ts @@ -0,0 +1,97 @@ +/** + * Copyright (c) 2021 Gitpod GmbH. All rights reserved. + * Licensed under the GNU Affero General Public License (AGPL). + * See License-AGPL.txt in the project root for license information. + */ + +import { injectable } from 'inversify'; +import * as prometheusClient from 'prom-client'; + +type GrpcMethodType = 'unary' | 'client_stream' | 'server_stream' | 'bidi_stream'; +export interface IGrpcCallMetricsLabels { + service: string, + method: string, + type: GrpcMethodType, +} + +export interface IGrpcCallMetricsLabelsWithCode extends IGrpcCallMetricsLabels { + code: string +} + +export const IClientCallMetrics = Symbol("IClientCallMetrics"); + +export interface IClientCallMetrics { + started(labels: IGrpcCallMetricsLabels) : void; + sent(labels: IGrpcCallMetricsLabels) : void; + received(labels: IGrpcCallMetricsLabels) : void; + handled(labels: IGrpcCallMetricsLabelsWithCode) : void; +} + +@injectable() +export class PrometheusClientCallMetrics implements IClientCallMetrics { + + readonly startedCounter: prometheusClient.Counter; + readonly sentCounter: prometheusClient.Counter; + readonly receivedCounter: prometheusClient.Counter; + readonly handledCounter: prometheusClient.Counter; + + constructor() { + this.startedCounter = new prometheusClient.Counter({ + name: 'grpc_client_started_total', + help: 'Total number of RPCs started on the client.', + labelNames: ['grpc_service', 'grpc_method', 'grpc_type'], + registers: [prometheusClient.register] + }); + this.sentCounter = new prometheusClient.Counter({ + name: 'grpc_client_msg_sent_total', + help: ' Total number of gRPC stream messages sent by the client.', + labelNames: ['grpc_service', 'grpc_method', 'grpc_type'], + registers: [prometheusClient.register] + }); + this.receivedCounter = new prometheusClient.Counter({ + name: 'grpc_client_msg_received_total', + help: 'Total number of RPC stream messages received by the client.', + labelNames: ['grpc_service', 'grpc_method', 'grpc_type'], + registers: [prometheusClient.register] + }); + this.handledCounter = new prometheusClient.Counter({ + name: 'grpc_client_handled_total', + help: 'Total number of RPCs completed by the client, regardless of success or failure.', + labelNames: ['grpc_service', 'grpc_method', 'grpc_type', 'grpc_code'], + registers: [prometheusClient.register] + }); + } + + started(labels: IGrpcCallMetricsLabels): void { + this.startedCounter.inc({ + grpc_service: labels.service, + grpc_method: labels.method, + grpc_type: labels.type + }); + } + + sent(labels: IGrpcCallMetricsLabels): void { + this.sentCounter.inc({ + grpc_service: labels.service, + grpc_method: labels.method, + grpc_type: labels.type + }); + } + + received(labels: IGrpcCallMetricsLabels): void { + this.receivedCounter.inc({ + grpc_service: labels.service, + grpc_method: labels.method, + grpc_type: labels.type + }); + } + + handled(labels: IGrpcCallMetricsLabelsWithCode): void { + this.handledCounter.inc({ + grpc_service: labels.service, + grpc_method: labels.method, + grpc_type: labels.type, + grpc_code: labels.code + }); + } +} diff --git a/components/image-builder-api/typescript/src/sugar.ts b/components/image-builder-api/typescript/src/sugar.ts index 4cabafe309c035..0a41e600b2fd30 100644 --- a/components/image-builder-api/typescript/src/sugar.ts +++ b/components/image-builder-api/typescript/src/sugar.ts @@ -8,10 +8,11 @@ import { ImageBuilderClient } from "./imgbuilder_grpc_pb"; import { TraceContext } from '@gitpod/gitpod-protocol/lib/util/tracing'; import { Deferred } from "@gitpod/gitpod-protocol/lib/util/deferred"; import { log } from "@gitpod/gitpod-protocol/lib/util/logging"; +import { createClientCallMetricsInterceptor, IClientCallMetrics } from "@gitpod/content-service/lib/client-call-metrics"; import * as opentracing from 'opentracing'; import { Metadata } from "@grpc/grpc-js"; import { BuildRequest, BuildResponse, BuildStatus, LogsRequest, LogsResponse, ResolveWorkspaceImageResponse, ResolveWorkspaceImageRequest, ResolveBaseImageRequest, ResolveBaseImageResponse } from "./imgbuilder_pb"; -import { injectable, inject } from 'inversify'; +import { injectable, inject, optional } from 'inversify'; import * as grpc from "@grpc/grpc-js"; import { TextDecoder } from "util"; @@ -33,6 +34,7 @@ function withTracing(ctx: TraceContext) { } export const ImageBuilderClientConfig = Symbol("ImageBuilderClientConfig"); +export const ImageBuilderClientCallMetrics = Symbol('ImageBuilderCallMetrics') // ImageBuilderClientConfig configures the access to an image builder export interface ImageBuilderClientConfig { @@ -43,14 +45,25 @@ export interface ImageBuilderClientConfig { export class CachingImageBuilderClientProvider implements ImageBuilderClientProvider { @inject(ImageBuilderClientConfig) protected readonly clientConfig: ImageBuilderClientConfig; + @inject(ImageBuilderClientCallMetrics) @optional() + protected readonly clientCallMetrics: IClientCallMetrics; + // gRPC connections maintain their connectivity themselves, i.e. they reconnect when neccesary. // They can also be used concurrently, even across services. // Thus it makes sense to cache them rather than create a new connection for each request. protected connectionCache: PromisifiedImageBuilderClient | undefined; getDefault() { + let interceptor: grpc.Interceptor[] = []; + if (this.clientCallMetrics) { + interceptor = [ createClientCallMetricsInterceptor(this.clientCallMetrics) ]; + } + if (!this.connectionCache || !this.connectionCache.isConnectionAlive()) { - this.connectionCache = new PromisifiedImageBuilderClient(new ImageBuilderClient(this.clientConfig.address, grpc.credentials.createInsecure())); + this.connectionCache = new PromisifiedImageBuilderClient( + new ImageBuilderClient(this.clientConfig.address, grpc.credentials.createInsecure()), + interceptor + ); } return this.connectionCache!; } @@ -68,7 +81,7 @@ export interface StagedBuildResponse { export class PromisifiedImageBuilderClient { - constructor(public readonly client: ImageBuilderClient) { } + constructor(public readonly client: ImageBuilderClient, protected readonly interceptor: grpc.Interceptor[]) { } public isConnectionAlive() { const cs = this.client.getChannel().getConnectivityState(false); @@ -78,7 +91,7 @@ export class PromisifiedImageBuilderClient { public resolveBaseImage(ctx: TraceContext, request: ResolveBaseImageRequest): Promise { return new Promise((resolve, reject) => { const span = TraceContext.startSpan(`/image-builder/resolveBaseImage`, ctx); - this.client.resolveBaseImage(request, withTracing({ span }), (err, resp) => { + this.client.resolveBaseImage(request, withTracing({ span }), this.getDefaultUnaryOptions(), (err, resp) => { if (err) { TraceContext.logError({ span }, err); reject(err); @@ -93,7 +106,7 @@ export class PromisifiedImageBuilderClient { public resolveWorkspaceImage(ctx: TraceContext, request: ResolveWorkspaceImageRequest): Promise { return new Promise((resolve, reject) => { const span = TraceContext.startSpan(`/image-builder/resolveWorkspaceImage`, ctx); - this.client.resolveWorkspaceImage(request, withTracing({ span }), (err, resp) => { + this.client.resolveWorkspaceImage(request, withTracing({ span }), this.getDefaultUnaryOptions(), (err, resp) => { span.finish(); if (err) { TraceContext.logError({ span }, err); @@ -202,4 +215,10 @@ export class PromisifiedImageBuilderClient { }) } + protected getDefaultUnaryOptions(): Partial { + return { + interceptors: this.interceptor, + } + } + } diff --git a/components/image-builder-mk3/cmd/run.go b/components/image-builder-mk3/cmd/run.go index da82edd1bff53b..5bc712304aa4ec 100644 --- a/components/image-builder-mk3/cmd/run.go +++ b/components/image-builder-mk3/cmd/run.go @@ -21,6 +21,8 @@ import ( "github.com/gitpod-io/gitpod/image-builder/pkg/resolve" "github.com/opentracing/opentracing-go" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/spf13/cobra" "google.golang.org/grpc" @@ -34,6 +36,34 @@ var runCmd = &cobra.Command{ cfg := getConfig() common_grpc.SetupLogging() + var promreg prometheus.Registerer + if cfg.Prometheus.Addr != "" { + reg := prometheus.NewRegistry() + promreg = reg + + handler := http.NewServeMux() + handler.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{})) + + // BEWARE: for the gRPC client side metrics to work it's important to call common_grpc.ClientMetrics() + // before NewOrchestratingBuilder as the latter produces the gRPC client. + reg.MustRegister( + collectors.NewGoCollector(), + collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}), + common_grpc.ClientMetrics(), + ) + + go func() { + err := http.ListenAndServe(cfg.Prometheus.Addr, handler) + if err != nil { + log.WithError(err).Error("Prometheus metrics server failed") + } + }() + log.WithField("addr", cfg.Prometheus.Addr).Info("started Prometheus metrics server") + } + + if cfg.PProf.Addr != "" { + go pprof.Serve(cfg.PProf.Addr) + } ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -57,6 +87,12 @@ var runCmd = &cobra.Command{ go resolver.StartCaching(ctx, interval) service.RefResolver = resolver } + if promreg != nil { + err = service.RegisterMetrics(promreg) + if err != nil { + log.Fatal(err) + } + } err = service.Start(ctx) if err != nil { @@ -89,23 +125,6 @@ var runCmd = &cobra.Command{ }() log.WithField("addr", cfg.Service.Addr).Info("started workspace content server") - if cfg.Prometheus.Addr != "" { - handler := http.NewServeMux() - handler.Handle("/metrics", promhttp.Handler()) - - go func() { - err := http.ListenAndServe(cfg.Prometheus.Addr, handler) - if err != nil { - log.WithError(err).Error("Prometheus metrics server failed") - } - }() - log.WithField("addr", cfg.Prometheus.Addr).Info("started Prometheus metrics server") - } - - if cfg.PProf.Addr != "" { - go pprof.Serve(cfg.PProf.Addr) - } - // run until we're told to stop sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) diff --git a/components/image-builder-mk3/go.mod b/components/image-builder-mk3/go.mod index 077f21b2f5efc8..f8c53ae3b7d6de 100644 --- a/components/image-builder-mk3/go.mod +++ b/components/image-builder-mk3/go.mod @@ -34,6 +34,7 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.2 // indirect github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect + github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/hashicorp/go-cleanhttp v0.5.1 // indirect github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0 // indirect github.com/inconshreveable/mousetrap v1.0.0 // indirect diff --git a/components/image-builder-mk3/go.sum b/components/image-builder-mk3/go.sum index 5efe1028a233ce..f00741d43f5b08 100644 --- a/components/image-builder-mk3/go.sum +++ b/components/image-builder-mk3/go.sum @@ -409,6 +409,7 @@ github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:Fecb github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= diff --git a/components/image-builder-mk3/pkg/orchestrator/metrics.go b/components/image-builder-mk3/pkg/orchestrator/metrics.go new file mode 100644 index 00000000000000..f7a64042d2efe6 --- /dev/null +++ b/components/image-builder-mk3/pkg/orchestrator/metrics.go @@ -0,0 +1,57 @@ +// Copyright (c) 2021 Gitpod GmbH. All rights reserved. +// Licensed under the GNU Affero General Public License (AGPL). +// See License-AGPL.txt in the project root for license information. + +package orchestrator + +import ( + "strconv" + + "github.com/prometheus/client_golang/prometheus" +) + +// RegisterMetrics registers the metrics of this builder +func (o *Orchestrator) RegisterMetrics(reg prometheus.Registerer) error { + err := reg.Register(o.metrics.imageBuildsDoneTotal) + if err != nil { + return err + } + err = reg.Register(o.metrics.imageBuildsStartedTotal) + if err != nil { + return err + } + return nil +} + +const ( + metricsNamespace = "gitpod" + metricsSubsystem = "image_builder" +) + +type metrics struct { + imageBuildsDoneTotal *prometheus.CounterVec + imageBuildsStartedTotal prometheus.Counter +} + +func newMetrics() *metrics { + return &metrics{ + imageBuildsDoneTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: metricsNamespace, + Subsystem: metricsSubsystem, + Name: "builds_done_total", + }, []string{"success"}), + imageBuildsStartedTotal: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: metricsNamespace, + Subsystem: metricsSubsystem, + Name: "builds_started_total", + }), + } +} + +func (m *metrics) BuildDone(success bool) { + m.imageBuildsDoneTotal.WithLabelValues(strconv.FormatBool(success)).Inc() +} + +func (m *metrics) BuildStarted() { + m.imageBuildsStartedTotal.Inc() +} diff --git a/components/image-builder-mk3/pkg/orchestrator/orchestrator.go b/components/image-builder-mk3/pkg/orchestrator/orchestrator.go index 3f80a6fe1d11f2..f0548994075d0d 100644 --- a/components/image-builder-mk3/pkg/orchestrator/orchestrator.go +++ b/components/image-builder-mk3/pkg/orchestrator/orchestrator.go @@ -175,6 +175,7 @@ func NewOrchestratingBuilder(cfg Configuration) (res *Orchestrator, err error) { logListener: make(map[string]map[logListener]struct{}), censorship: make(map[string][]string), builderAuthKey: builderAuthKey, + metrics: newMetrics(), } o.monitor = newBuildMonitor(o, o.wsman) @@ -198,6 +199,8 @@ type Orchestrator struct { monitor *buildMonitor + metrics *metrics + protocol.UnimplementedImageBuilderServer } diff --git a/components/registry-facade/cmd/run.go b/components/registry-facade/cmd/run.go index 23108e8c8dbad7..d4d021d7bc92f2 100644 --- a/components/registry-facade/cmd/run.go +++ b/components/registry-facade/cmd/run.go @@ -20,10 +20,12 @@ import ( "github.com/containerd/containerd/remotes/docker" "github.com/docker/cli/cli/config/configfile" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/spf13/cobra" "golang.org/x/net/context" + common_grpc "github.com/gitpod-io/gitpod/common-go/grpc" "github.com/gitpod-io/gitpod/common-go/log" "github.com/gitpod-io/gitpod/common-go/pprof" "github.com/gitpod-io/gitpod/registry-facade/pkg/registry" @@ -43,6 +45,34 @@ var runCmd = &cobra.Command{ log.WithError(err).WithField("filename", configPath).Fatal("cannot load config") } + promreg := prometheus.NewRegistry() + gpreg := prometheus.WrapRegistererWithPrefix("gitpod_registry_facade_", promreg) + rtt, err := registry.NewMeasuringRegistryRoundTripper(newDefaultTransport(), prometheus.WrapRegistererWithPrefix("downstream_", gpreg)) + if err != nil { + log.WithError(err).Fatal("cannot register metrics") + } + if cfg.PrometheusAddr != "" { + promreg.MustRegister( + collectors.NewGoCollector(), + collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}), + common_grpc.ClientMetrics(), + ) + + handler := http.NewServeMux() + handler.Handle("/metrics", promhttp.HandlerFor(promreg, promhttp.HandlerOpts{})) + + go func() { + err := http.ListenAndServe(cfg.PrometheusAddr, handler) + if err != nil { + log.WithError(err).Error("Prometheus metrics server failed") + } + }() + log.WithField("addr", cfg.PrometheusAddr).Info("started Prometheus metrics server") + } + if cfg.PProfAddr != "" { + go pprof.Serve(cfg.PProfAddr) + } + var dockerCfg *configfile.ConfigFile if cfg.AuthCfg != "" { authCfg := cfg.AuthCfg @@ -63,13 +93,6 @@ var runCmd = &cobra.Command{ log.WithField("fn", authCfg).Info("using authentication for backing registries") } - promreg := prometheus.NewRegistry() - gpreg := prometheus.WrapRegistererWithPrefix("gitpod_registry_facade_", promreg) - rtt, err := registry.NewMeasuringRegistryRoundTripper(newDefaultTransport(), prometheus.WrapRegistererWithPrefix("downstream_", gpreg)) - if err != nil { - log.WithError(err).Fatal("cannot registry metrics") - } - resolverProvider := func() remotes.Resolver { var resolverOpts docker.ResolverOptions if dockerCfg != nil { @@ -95,27 +118,6 @@ var runCmd = &cobra.Command{ reg.MustServe() }() - if cfg.PProfAddr != "" { - go pprof.Serve(cfg.PProfAddr) - } - if cfg.PrometheusAddr != "" { - promreg.MustRegister( - prometheus.NewGoCollector(), - prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}), - ) - - handler := http.NewServeMux() - handler.Handle("/metrics", promhttp.HandlerFor(promreg, promhttp.HandlerOpts{})) - - go func() { - err := http.ListenAndServe(cfg.PrometheusAddr, handler) - if err != nil { - log.WithError(err).Error("Prometheus metrics server failed") - } - }() - log.WithField("addr", cfg.PrometheusAddr).Info("started Prometheus metrics server") - } - log.Info("🏪 registry facade is up and running") sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM) diff --git a/components/registry-facade/go.mod b/components/registry-facade/go.mod index f7f0bc670f4182..22886821becfa8 100644 --- a/components/registry-facade/go.mod +++ b/components/registry-facade/go.mod @@ -31,6 +31,7 @@ require ( github.com/go-test/deep v1.0.5 // indirect github.com/golang/protobuf v1.5.2 // indirect github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect + github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/klauspost/compress v1.11.13 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect diff --git a/components/registry-facade/go.sum b/components/registry-facade/go.sum index f453c43ec45f4a..b4c26d8590ed6c 100644 --- a/components/registry-facade/go.sum +++ b/components/registry-facade/go.sum @@ -383,6 +383,7 @@ github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:Fecb github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= diff --git a/components/server/src/container-module.ts b/components/server/src/container-module.ts index 618e5e57a90e32..0de9d9372c475e 100644 --- a/components/server/src/container-module.ts +++ b/components/server/src/container-module.ts @@ -31,7 +31,7 @@ import { UserService } from './user/user-service'; import { UserDeletionService } from './user/user-deletion-service'; import { WorkspaceDeletionService } from './workspace/workspace-deletion-service'; import { EnvvarPrefixParser } from './workspace/envvar-prefix-context-parser'; -import { WorkspaceManagerClientProvider } from '@gitpod/ws-manager/lib/client-provider'; +import { IWorkspaceManagerClientCallMetrics, WorkspaceManagerClientProvider } from '@gitpod/ws-manager/lib/client-provider'; import { WorkspaceManagerClientProviderCompositeSource, WorkspaceManagerClientProviderDBSource, WorkspaceManagerClientProviderEnvSource, WorkspaceManagerClientProviderSource } from '@gitpod/ws-manager/lib/client-provider-source'; import { WorkspaceStarter } from './workspace/workspace-starter'; import { TracingManager } from '@gitpod/gitpod-protocol/lib/util/tracing'; @@ -41,7 +41,7 @@ import { ConsensusLeaderMessenger } from './consensus/consensus-leader-messenger import { RabbitMQConsensusLeaderMessenger } from './consensus/rabbitmq-consensus-leader-messenger'; import { ConsensusLeaderQorum } from './consensus/consensus-leader-quorum'; import { StorageClient } from './storage/storage-client'; -import { ImageBuilderClientConfig, ImageBuilderClientProvider, CachingImageBuilderClientProvider } from '@gitpod/image-builder/lib'; +import { ImageBuilderClientConfig, ImageBuilderClientProvider, CachingImageBuilderClientProvider, ImageBuilderClientCallMetrics } from '@gitpod/image-builder/lib'; import { ImageSourceProvider } from './workspace/image-source-provider'; import { WorkspaceGarbageCollector } from './workspace/garbage-collector'; import { TokenGarbageCollector } from './user/token-garbage-collector'; @@ -81,6 +81,8 @@ import { NewsletterSubscriptionController } from './user/newsletter-subscription import { Config, ConfigFile } from './config'; import { defaultGRPCOptions } from '@gitpod/gitpod-protocol/lib/util/grpc'; import { IDEConfigService } from './ide-config'; +import { PrometheusClientCallMetrics } from "@gitpod/gitpod-protocol/lib/messaging/client-call-metrics"; +import { IClientCallMetrics } from '@gitpod/content-service/lib/client-call-metrics'; export const productionContainerModule = new ContainerModule((bind, unbind, isBound, rebind) => { bind(Config).toConstantValue(ConfigFile.fromFile()); @@ -131,12 +133,16 @@ export const productionContainerModule = new ContainerModule((bind, unbind, isBo } ).inSingletonScope(); + bind(PrometheusClientCallMetrics).toSelf().inSingletonScope(); + bind(IClientCallMetrics).to(PrometheusClientCallMetrics).inSingletonScope(); + bind(ImageBuilderClientConfig).toDynamicValue(ctx => { const config = ctx.container.get(Config); return { address: config.imageBuilderAddr } }); bind(CachingImageBuilderClientProvider).toSelf().inSingletonScope(); bind(ImageBuilderClientProvider).toService(CachingImageBuilderClientProvider); + bind(ImageBuilderClientCallMetrics).toService(IClientCallMetrics); /* The binding order of the context parser does not configure preference/a working order. Each context parser must be able * to decide for themselves, independently and without overlap to the other parsers what to do. @@ -167,6 +173,7 @@ export const productionContainerModule = new ContainerModule((bind, unbind, isBo bind(WorkspaceManagerClientProviderCompositeSource).toSelf().inSingletonScope(); bind(WorkspaceManagerClientProviderSource).to(WorkspaceManagerClientProviderEnvSource).inSingletonScope(); bind(WorkspaceManagerClientProviderSource).to(WorkspaceManagerClientProviderDBSource).inSingletonScope(); + bind(IWorkspaceManagerClientCallMetrics).toService(IClientCallMetrics); bind(TheiaPluginService).toSelf().inSingletonScope(); diff --git a/components/ws-manager-api/typescript/src/client-provider.ts b/components/ws-manager-api/typescript/src/client-provider.ts index 14c5c430382dd7..3cca85d0739b26 100644 --- a/components/ws-manager-api/typescript/src/client-provider.ts +++ b/components/ws-manager-api/typescript/src/client-provider.ts @@ -4,21 +4,27 @@ * See License-AGPL.txt in the project root for license information. */ -import * as grpc from "@grpc/grpc-js"; -import { injectable, inject } from 'inversify'; -import { WorkspaceManagerClient } from './core_grpc_pb'; -import { PromisifiedWorkspaceManagerClient, linearBackoffStrategy } from "./promisified-client"; +import { createClientCallMetricsInterceptor, IClientCallMetrics } from "@gitpod/content-service/lib/client-call-metrics"; import { Disposable, User, Workspace, WorkspaceInstance } from "@gitpod/gitpod-protocol"; +import { defaultGRPCOptions } from '@gitpod/gitpod-protocol/lib/util/grpc'; +import { log } from '@gitpod/gitpod-protocol/lib/util/logging'; import { WorkspaceClusterWoTLS, WorkspaceManagerConnectionInfo } from '@gitpod/gitpod-protocol/lib/workspace-cluster'; +import * as grpc from "@grpc/grpc-js"; +import { inject, injectable, optional } from 'inversify'; import { WorkspaceManagerClientProviderCompositeSource, WorkspaceManagerClientProviderSource } from "./client-provider-source"; -import { log } from '@gitpod/gitpod-protocol/lib/util/logging'; -import { defaultGRPCOptions } from '@gitpod/gitpod-protocol/lib/util/grpc'; +import { WorkspaceManagerClient } from './core_grpc_pb'; +import { linearBackoffStrategy, PromisifiedWorkspaceManagerClient } from "./promisified-client"; + +export const IWorkspaceManagerClientCallMetrics = Symbol('IWorkspaceManagerClientCallMetrics') @injectable() export class WorkspaceManagerClientProvider implements Disposable { @inject(WorkspaceManagerClientProviderCompositeSource) protected readonly source: WorkspaceManagerClientProviderSource; + @inject(IWorkspaceManagerClientCallMetrics) @optional() + protected readonly clientCallMetrics: IClientCallMetrics; + // gRPC connections maintain their connectivity themselves, i.e. they reconnect when neccesary. // They can also be used concurrently, even across services. // Thus it makes sense to cache them rather than create a new connection for each request. @@ -29,7 +35,7 @@ export class WorkspaceManagerClientProvider implements Disposable { * * @returns The WorkspaceManagerClient that was chosen to start the next workspace with. */ - public async getStartManager(user: User, workspace: Workspace, instance: WorkspaceInstance): Promise<{ manager: PromisifiedWorkspaceManagerClient, installation: string}> { + public async getStartManager(user: User, workspace: Workspace, instance: WorkspaceInstance): Promise<{ manager: PromisifiedWorkspaceManagerClient, installation: string }> { const availableCluster = await this.getAvailableStartCluster(user, workspace, instance); const chosenCluster = chooseCluster(availableCluster); const grpcOptions: grpc.ClientOptions = { @@ -66,7 +72,7 @@ export class WorkspaceManagerClientProvider implements Disposable { const info = await getConnectionInfo(); client = this.createClient(info, grpcOptions); this.connectionCache.set(name, client); - } else if(client.getChannel().getConnectivityState(true) != grpc.connectivityState.READY) { + } else if (client.getChannel().getConnectivityState(true) != grpc.connectivityState.READY) { client.close(); console.warn(`Lost connection to workspace manager \"${name}\" - attempting to reestablish`); @@ -75,8 +81,13 @@ export class WorkspaceManagerClientProvider implements Disposable { this.connectionCache.set(name, client); } + let interceptor: grpc.Interceptor[] = []; + if (this.clientCallMetrics) { + interceptor = [ createClientCallMetricsInterceptor(this.clientCallMetrics) ]; + } + const stopSignal = { stop: false }; - return new PromisifiedWorkspaceManagerClient(client, linearBackoffStrategy(30, 1000, stopSignal), stopSignal); + return new PromisifiedWorkspaceManagerClient(client, linearBackoffStrategy(30, 1000, stopSignal), interceptor, stopSignal); } /** @@ -98,7 +109,7 @@ export class WorkspaceManagerClientProvider implements Disposable { credentials = grpc.credentials.createInsecure(); } - const options = { + const options: Partial = { ...grpcOptions, 'grpc.ssl_target_name_override': "ws-manager", // this makes sure we can call ws-manager with a URL different to "ws-manager" }; @@ -110,6 +121,8 @@ export class WorkspaceManagerClientProvider implements Disposable { } } + + /** * * @param clusters diff --git a/components/ws-manager-api/typescript/src/promisified-client.ts b/components/ws-manager-api/typescript/src/promisified-client.ts index 7e6c04fee9e772..e4c47f6a05c9bb 100644 --- a/components/ws-manager-api/typescript/src/promisified-client.ts +++ b/components/ws-manager-api/typescript/src/promisified-client.ts @@ -64,6 +64,7 @@ export class PromisifiedWorkspaceManagerClient implements Disposable { constructor( public readonly client: WorkspaceManagerClient, protected readonly retryIfUnavailable: RetryStrategy = noRetry, + protected readonly interceptor: grpc.Interceptor[], protected readonly stopSignal?: { stop: boolean }) { } public startWorkspace(ctx: TraceContext, request: StartWorkspaceRequest): Promise { @@ -180,7 +181,7 @@ export class PromisifiedWorkspaceManagerClient implements Disposable { return this.retryIfUnavailable((attempt: number) => new Promise((resolve, reject) => { const span = TraceContext.startSpan(`/ws-manager/takeSnapshot`, ctx); span.log({attempt}); - this.client.takeSnapshot(request, withTracing({span}), (err, resp) => { + this.client.takeSnapshot(request, withTracing({span}), this.getDefaultUnaryOptions(), (err, resp) => { span.finish(); if (err) { reject(err); @@ -196,7 +197,7 @@ export class PromisifiedWorkspaceManagerClient implements Disposable { return this.retryIfUnavailable((attempt: number) => new Promise((resolve, reject) => { const span = TraceContext.startSpan(`/ws-manager/controlAdmission`, ctx); span.log({attempt}); - this.client.controlAdmission(request, withTracing({span}), (err, resp) => { + this.client.controlAdmission(request, withTracing({span}), this.getDefaultUnaryOptions(), (err, resp) => { span.finish(); if (err) { reject(err); @@ -221,7 +222,8 @@ export class PromisifiedWorkspaceManagerClient implements Disposable { protected getDefaultUnaryOptions(): Partial { let deadline = new Date(new Date().getTime() + 30000); return { - deadline + deadline, + interceptors: this.interceptor, } } diff --git a/components/ws-manager-bridge/BUILD.yaml b/components/ws-manager-bridge/BUILD.yaml index 342c9134d65ace..22e9b120e0cac9 100644 --- a/components/ws-manager-bridge/BUILD.yaml +++ b/components/ws-manager-bridge/BUILD.yaml @@ -5,6 +5,7 @@ packages: - "**/*.ts" - package.json deps: + - components/content-service-api/typescript:lib - components/gitpod-db:lib - components/gitpod-messagebus:lib - components/gitpod-protocol:lib diff --git a/components/ws-manager-bridge/src/bridge-controller.ts b/components/ws-manager-bridge/src/bridge-controller.ts index c31db43ec64616..ae954a621af1b7 100644 --- a/components/ws-manager-bridge/src/bridge-controller.ts +++ b/components/ws-manager-bridge/src/bridge-controller.ts @@ -15,6 +15,7 @@ import { WorkspaceCluster } from "@gitpod/gitpod-protocol/lib/workspace-cluster" import { Queue } from "@gitpod/gitpod-protocol"; import { defaultGRPCOptions } from '@gitpod/gitpod-protocol/lib/util/grpc'; import * as grpc from '@grpc/grpc-js'; +import { PrometheusMetricsExporter } from "./prometheus-metrics-exporter"; @injectable() export class BridgeController { @@ -30,6 +31,9 @@ export class BridgeController { @inject(WorkspaceClusterDB) protected readonly db: WorkspaceClusterDB; + @inject(PrometheusMetricsExporter) + protected readonly metrics: PrometheusMetricsExporter; + protected readonly bridges: Map = new Map(); protected readonly reconcileQueue: Queue = new Queue(); protected reconcileTimer: NodeJS.Timeout | undefined = undefined; @@ -76,6 +80,7 @@ export class BridgeController { } } + this.metrics.updateClusterMetrics(Array.from(allClusters).map(c => c[1])); for (const [name, newCluster] of allClusters) { log.info("reconcile: create bridge for new cluster", { name }); const bridge = await this.createAndStartBridge(newCluster); diff --git a/components/ws-manager-bridge/src/container-module.ts b/components/ws-manager-bridge/src/container-module.ts index 11915b7974f08e..c3fee712c3728d 100644 --- a/components/ws-manager-bridge/src/container-module.ts +++ b/components/ws-manager-bridge/src/container-module.ts @@ -17,12 +17,14 @@ import { TracingManager } from '@gitpod/gitpod-protocol/lib/util/tracing'; import { PrometheusMetricsExporter } from './prometheus-metrics-exporter'; import { BridgeController, WorkspaceManagerClientProviderConfigSource } from './bridge-controller'; import { filePathTelepresenceAware } from '@gitpod/gitpod-protocol/lib/env'; -import { WorkspaceManagerClientProvider } from '@gitpod/ws-manager/lib/client-provider'; +import { WorkspaceManagerClientProvider, IWorkspaceManagerClientCallMetrics } from '@gitpod/ws-manager/lib/client-provider'; import { WorkspaceManagerClientProviderCompositeSource, WorkspaceManagerClientProviderDBSource, WorkspaceManagerClientProviderSource } from '@gitpod/ws-manager/lib/client-provider-source'; import { ClusterService, ClusterServiceServer } from './cluster-service-server'; import { IAnalyticsWriter } from '@gitpod/gitpod-protocol/lib/analytics'; import { newAnalyticsWriterFromEnv } from '@gitpod/gitpod-protocol/lib/util/analytics'; import { MetaInstanceController } from './meta-instance-controller'; +import { IClientCallMetrics } from '@gitpod/content-service/lib/client-call-metrics'; +import { PrometheusClientCallMetrics } from "@gitpod/gitpod-protocol/lib/messaging/client-call-metrics"; export const containerModule = new ContainerModule(bind => { @@ -34,6 +36,10 @@ export const containerModule = new ContainerModule(bind => { bind(MetaInstanceController).toSelf().inSingletonScope(); + bind(PrometheusClientCallMetrics).toSelf().inSingletonScope(); + bind(IClientCallMetrics).to(PrometheusClientCallMetrics).inSingletonScope(); + bind(IWorkspaceManagerClientCallMetrics).toService(IClientCallMetrics); + bind(WorkspaceManagerClientProvider).toSelf().inSingletonScope(); bind(WorkspaceManagerClientProviderCompositeSource).toSelf().inSingletonScope(); bind(WorkspaceManagerClientProviderSource).to(WorkspaceManagerClientProviderConfigSource).inSingletonScope(); diff --git a/components/ws-manager-bridge/src/main.ts b/components/ws-manager-bridge/src/main.ts index 031736c763b257..081e542c4d36e0 100644 --- a/components/ws-manager-bridge/src/main.ts +++ b/components/ws-manager-bridge/src/main.ts @@ -34,8 +34,8 @@ export const start = async (container: Container) => { res.send(prometheusClient.register.metrics().toString()); }); const metricsPort = 9500; - const metricsHttpServer = metricsApp.listen(metricsPort, () => { - log.info(`prometheus metrics server running on: ${metricsPort}`); + const metricsHttpServer = metricsApp.listen(metricsPort, 'localhost', () => { + log.info(`prometheus metrics server running on: localhost:${metricsPort}`); }); const bridgeController = container.get(BridgeController); diff --git a/components/ws-manager-bridge/src/prometheus-metrics-exporter.ts b/components/ws-manager-bridge/src/prometheus-metrics-exporter.ts index 77b904b5233fb1..403facf750359a 100644 --- a/components/ws-manager-bridge/src/prometheus-metrics-exporter.ts +++ b/components/ws-manager-bridge/src/prometheus-metrics-exporter.ts @@ -7,11 +7,14 @@ import * as prom from 'prom-client'; import { injectable } from "inversify"; import { WorkspaceInstance } from '@gitpod/gitpod-protocol'; +import { WorkspaceClusterWoTLS } from '@gitpod/gitpod-protocol/src/workspace-cluster'; @injectable() export class PrometheusMetricsExporter { protected readonly workspaceStartupTimeHistogram: prom.Histogram; protected readonly timeToFirstUserActivityHistogram: prom.Histogram; + protected readonly clusterScore: prom.Gauge; + protected readonly clusterCordoned: prom.Gauge; constructor() { this.workspaceStartupTimeHistogram = new prom.Histogram({ @@ -26,6 +29,16 @@ export class PrometheusMetricsExporter { labelNames: ['region'], buckets: prom.exponentialBuckets(2, 2, 10), }); + this.clusterScore = new prom.Gauge({ + name: 'gitpod_ws_manager_bridge_cluster_score', + help: 'Score of the individual registered workspace cluster', + labelNames: ["workspace_cluster"] + }); + this.clusterCordoned = new prom.Gauge({ + name: 'gitpod_ws_manager_bridge_cluster_cordoned', + help: 'Cordoned status of the individual registered workspace cluster', + labelNames: ["workspace_cluster"] + }); } observeWorkspaceStartupTime(instance: WorkspaceInstance): void { @@ -46,4 +59,15 @@ export class PrometheusMetricsExporter { region: instance.region, }, timeToFirstUserActivity); } -} \ No newline at end of file + + updateClusterMetrics(clusters: WorkspaceClusterWoTLS[]): void { + this.clusterScore.reset(); + this.clusterCordoned.reset(); + + clusters.forEach(cluster => { + this.clusterCordoned.labels(cluster.name).set(cluster.state === 'cordoned' ? 1 : 0); + this.clusterScore.labels(cluster.name).set(cluster.score); + }); + } +} + diff --git a/components/ws-manager/cmd/run.go b/components/ws-manager/cmd/run.go index 0d70a6ad5dc26b..735b99b1648c31 100644 --- a/components/ws-manager/cmd/run.go +++ b/components/ws-manager/cmd/run.go @@ -64,6 +64,10 @@ var runCmd = &cobra.Command{ if cfg.Prometheus.Addr != "" { opts.MetricsBindAddress = cfg.Prometheus.Addr + err := metrics.Registry.Register(common_grpc.ClientMetrics()) + if err != nil { + log.WithError(err).Error("Prometheus metrics incomplete") + } } mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), opts) @@ -103,7 +107,7 @@ var runCmd = &cobra.Command{ defer mgmt.Close() if cfg.Prometheus.Addr != "" { - err := mgmt.RegisterMetrics(metrics.Registry) + err = mgmt.RegisterMetrics(metrics.Registry) if err != nil { log.WithError(err).Error("Prometheus metrics incomplete") } diff --git a/components/ws-manager/pkg/manager/manager.go b/components/ws-manager/pkg/manager/manager.go index e48e7b8c2edab4..75a81195d0d003 100644 --- a/components/ws-manager/pkg/manager/manager.go +++ b/components/ws-manager/pkg/manager/manager.go @@ -1171,6 +1171,7 @@ func (m *Manager) connectToWorkspaceDaemon(ctx context.Context, wso workspaceObj // newWssyncConnectionFactory creates a new wsdaemon connection factory based on the wsmanager configuration func newWssyncConnectionFactory(managerConfig config.Configuration) (grpcpool.Factory, error) { cfg := managerConfig.WorkspaceDaemon + // TODO(cw): add client-side gRPC metrics grpcOpts := common_grpc.DefaultClientOptions() if cfg.TLS.Authority != "" || cfg.TLS.Certificate != "" && cfg.TLS.PrivateKey != "" { ca := cfg.TLS.Authority diff --git a/components/ws-proxy/cmd/run.go b/components/ws-proxy/cmd/run.go index d7dcc393222da8..f4de1e237521bc 100644 --- a/components/ws-proxy/cmd/run.go +++ b/components/ws-proxy/cmd/run.go @@ -12,6 +12,7 @@ import ( "time" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/spf13/cobra" @@ -35,6 +36,28 @@ var runCmd = &cobra.Command{ } common_grpc.SetupLogging() + if cfg.PrometheusAddr != "" { + reg := prometheus.NewRegistry() + reg.MustRegister( + collectors.NewGoCollector(), + collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}), + common_grpc.ClientMetrics(), + ) + + handler := http.NewServeMux() + handler.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{})) + + go func() { + err := http.ListenAndServe(cfg.PrometheusAddr, handler) + if err != nil { + log.WithError(err).Error("Prometheus metrics server failed") + } + }() + log.WithField("addr", cfg.PrometheusAddr).Info("started Prometheus metrics server") + } + if cfg.PProfAddr != "" { + go pprof.Serve(cfg.PProfAddr) + } const wsmanConnectionAttempts = 5 workspaceInfoProvider := proxy.NewRemoteWorkspaceInfoProvider(cfg.WorkspaceInfoProviderConfig) @@ -58,27 +81,6 @@ var runCmd = &cobra.Command{ go proxy.NewWorkspaceProxy(cfg.Ingress, cfg.Proxy, proxy.HostBasedRouter(cfg.Ingress.Header, cfg.Proxy.GitpodInstallation.WorkspaceHostSuffix, cfg.Proxy.GitpodInstallation.WorkspaceHostSuffixRegex), workspaceInfoProvider).MustServe() log.Infof("started proxying on %s", cfg.Ingress.HttpAddress) - if cfg.PProfAddr != "" { - go pprof.Serve(cfg.PProfAddr) - } - if cfg.PrometheusAddr != "" { - reg := prometheus.NewRegistry() - reg.MustRegister( - prometheus.NewGoCollector(), - prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}), - ) - - handler := http.NewServeMux() - handler.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{})) - - go func() { - err := http.ListenAndServe(cfg.PrometheusAddr, handler) - if err != nil { - log.WithError(err).Error("Prometheus metrics server failed") - } - }() - log.WithField("addr", cfg.PrometheusAddr).Info("started Prometheus metrics server") - } if cfg.ReadinessProbeAddr != "" { go func() { err = http.ListenAndServe(cfg.ReadinessProbeAddr, http.HandlerFunc(func(resp http.ResponseWriter, req *http.Request) { diff --git a/components/ws-proxy/go.mod b/components/ws-proxy/go.mod index 3def28c50c2beb..44acfc117b6d73 100644 --- a/components/ws-proxy/go.mod +++ b/components/ws-proxy/go.mod @@ -8,13 +8,11 @@ require ( github.com/go-ozzo/ozzo-validation v3.6.0+incompatible github.com/golang/mock v1.6.0 github.com/google/go-cmp v0.5.6 - github.com/google/tcpproxy v0.0.0-20200125044825-b6bb9b5b8252 github.com/gorilla/handlers v1.5.1 github.com/gorilla/mux v1.8.0 github.com/prometheus/client_golang v1.11.0 github.com/sirupsen/logrus v1.8.1 github.com/spf13/cobra v1.1.3 - golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 google.golang.org/grpc v1.39.1 ) @@ -27,6 +25,7 @@ require ( github.com/gitpod-io/gitpod/content-service/api v0.0.0-00010101000000-000000000000 // indirect github.com/golang/protobuf v1.5.2 // indirect github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect + github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect @@ -43,6 +42,7 @@ require ( golang.org/x/net v0.0.0-20210520170846-37e1c6afe023 // indirect golang.org/x/sys v0.0.0-20210616094352-59db8d763f22 // indirect golang.org/x/text v0.3.6 // indirect + golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba // indirect google.golang.org/genproto v0.0.0-20201019141844-1ed22bb0c154 // indirect google.golang.org/protobuf v1.27.1 // indirect ) diff --git a/components/ws-proxy/go.sum b/components/ws-proxy/go.sum index dca40f988d61fe..14afa167ed3f92 100644 --- a/components/ws-proxy/go.sum +++ b/components/ws-proxy/go.sum @@ -114,8 +114,6 @@ github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXi github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/tcpproxy v0.0.0-20200125044825-b6bb9b5b8252 h1:tAkooHvRrtO8kFB6YOPTpLQok3Hfv1DNDXdNqgi29Ao= -github.com/google/tcpproxy v0.0.0-20200125044825-b6bb9b5b8252/go.mod h1:DavVbd41y+b7ukKDmlnPR4nGYmkWXR6vHUkjQNiHPBs= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= @@ -128,6 +126,7 @@ github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/ad github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= diff --git a/dev/gpctl/go.mod b/dev/gpctl/go.mod index b089f2c49de739..68e88e8221a067 100644 --- a/dev/gpctl/go.mod +++ b/dev/gpctl/go.mod @@ -19,7 +19,7 @@ require ( golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 google.golang.org/grpc v1.39.1 google.golang.org/protobuf v1.27.1 - k8s.io/apimachinery v0.22.0 + k8s.io/apimachinery v0.22.1 k8s.io/client-go v0.0.0 ) @@ -56,7 +56,7 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect - k8s.io/api v0.22.0 // indirect + k8s.io/api v0.22.1 // indirect k8s.io/klog/v2 v2.9.0 // indirect k8s.io/utils v0.0.0-20210707171843-4b05e18ac7d9 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.1.2 // indirect diff --git a/test/go.mod b/test/go.mod index 0ffd112bd26085..12dc6a95c68698 100644 --- a/test/go.mod +++ b/test/go.mod @@ -17,8 +17,8 @@ require ( golang.org/x/sys v0.0.0-20210616094352-59db8d763f22 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 google.golang.org/grpc v1.39.1 - k8s.io/api v0.22.0 - k8s.io/apimachinery v0.22.0 + k8s.io/api v0.22.1 + k8s.io/apimachinery v0.22.1 k8s.io/client-go v0.22.0 ) @@ -33,7 +33,7 @@ require ( github.com/dustin/go-humanize v1.0.0 // indirect github.com/fsnotify/fsnotify v1.4.9 // indirect github.com/go-logr/logr v0.4.0 // indirect - github.com/go-ozzo/ozzo-validation v3.5.0+incompatible // indirect + github.com/go-ozzo/ozzo-validation v3.6.0+incompatible // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/mock v1.6.0 // indirect diff --git a/test/go.sum b/test/go.sum index fb91da9d932209..0085627b3788f5 100644 --- a/test/go.sum +++ b/test/go.sum @@ -309,6 +309,7 @@ github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh github.com/go-openapi/swag v0.19.14/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= github.com/go-ozzo/ozzo-validation v3.5.0+incompatible h1:sUy/in/P6askYr16XJgTKq/0SZhiWsdg4WZGaLsGQkM= github.com/go-ozzo/ozzo-validation v3.5.0+incompatible/go.mod h1:gsEKFIVnabGBt6mXmxK0MoFy+cZoTJY6mu5Ll3LVLBU= +github.com/go-ozzo/ozzo-validation v3.6.0+incompatible/go.mod h1:gsEKFIVnabGBt6mXmxK0MoFy+cZoTJY6mu5Ll3LVLBU= github.com/go-sql-driver/mysql v1.5.0 h1:ozyZYNQW3x3HtqT1jira07DN2PArx2v7/mN66gGcHOs= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=