From c4f850ac8b45f99d9a475c508bb53895e9fb8c05 Mon Sep 17 00:00:00 2001 From: Bryan Huhta Date: Tue, 30 Apr 2024 17:08:53 -0500 Subject: [PATCH 1/6] Remove old documentation file --- pkg/querier/vcs/github.md | 41 --------------------------------------- 1 file changed, 41 deletions(-) delete mode 100644 pkg/querier/vcs/github.md diff --git a/pkg/querier/vcs/github.md b/pkg/querier/vcs/github.md deleted file mode 100644 index 6361da61ab..0000000000 --- a/pkg/querier/vcs/github.md +++ /dev/null @@ -1,41 +0,0 @@ -# GitHub Integration - -The following diagram shows the flow of data when fetching source code from GitHub. - -```mermaid -sequenceDiagram - autonumber - actor User - participant Client - participant Pyroscope - participant GitHub - User->>Client: Select a Flame graph Node - Client->>Pyroscope: Request Flame graph Data For Node - Pyroscope-->>Client: Flame graph Data - Client-->Client: if the data has source code information - Client->>Pyroscope: Request Github App client ID - Pyroscope-->>Client: Github App client ID - Client->>GitHub: Redirect for Authorization of Pyroscope App - GitHub-->>Client: Send Authorization Code - Client->>Pyroscope: Login to GitHub with Authorization Code - Pyroscope->>GitHub: Request Access Token - GitHub-->>Pyroscope: Send OAuth token - Pyroscope-->>Client: Set OAuth token in cookie - User-->>Client: Select a git version - Client->>Pyroscope: Request code for given git version, file path and repo - Pyroscope->>GitHub: Fetch code - GitHub-->>Pyroscope: Send code - Pyroscope-->>Client: Send code - Client-->>Client: Render code and map line number -``` - -## GitHub App Authorization - -Pyroscope uses GitHub OAuth App to fetch source code for a given git version, file path and repo. This is done by using the [GitHub OAuth App](https://docs.github.com/en/developers/apps/authorizing-oauth-apps) to get an OAuth token for the user. This token is then used to fetch the source code from GitHub. - -Pyroscope supports a new API to fetch Github App client ID. This API is used to get the client ID for the GitHub OAuth App. This client ID is then used to redirect the user to GitHub to authorize Pyroscope to access the user's GitHub account. -Github will redirect the user back to the client with an authorization code. This authorization code is then used to get an OAuth token from GitHub via Pyroscope. - -The OAuth token is never stored by Pyroscope. It is only stored in the user's browser as a cookie. Only Pyroscope can access this cookie as it is encrypted with a secret key that is only known to Pyroscope. - -When requesting a file to Pyroscope and it returns a `401 Unauthorized` response, it means that the OAuth token has expired. At the same, time the user's browser will delete the cookie containing the OAuth token. The user will then have to log in again to GitHub to get a new OAuth token. From 4ad6b364d5dc7ca6dd360efb3dd6a2e7f6f549bd Mon Sep 17 00:00:00 2001 From: Bryan Huhta Date: Thu, 2 May 2024 17:17:44 -0500 Subject: [PATCH 2/6] Add metrics when interacting with the github API --- pkg/querier/querier.go | 2 +- pkg/querier/vcs/client/github.go | 28 +++++++++++++++++++++++----- pkg/querier/vcs/service.go | 29 ++++++++++++++++++++++++++--- 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go index 1ebf7025d9..f71329a950 100644 --- a/pkg/querier/querier.go +++ b/pkg/querier/querier.go @@ -125,7 +125,7 @@ func New(params *NewQuerierParams) (*Querier, error) { params.IngestersRing, ), storeGatewayQuerier: storeGatewayQuerier, - VCSServiceHandler: vcs.New(params.Logger), + VCSServiceHandler: vcs.New(params.Logger, params.Reg), storageBucket: params.StorageBucket, tenantConfigProvider: params.CfgProvider, } diff --git a/pkg/querier/vcs/client/github.go b/pkg/querier/vcs/client/github.go index 263bc6d92a..f415185eab 100644 --- a/pkg/querier/vcs/client/github.go +++ b/pkg/querier/vcs/client/github.go @@ -9,27 +9,37 @@ import ( "connectrpc.com/connect" "github.com/google/go-github/v58/github" + "github.com/prometheus/client_golang/prometheus" "golang.org/x/oauth2" vcsv1 "github.com/grafana/pyroscope/api/gen/proto/go/vcs/v1" ) // GithubClient returns a github client. -func GithubClient(ctx context.Context, token *oauth2.Token) (*githubClient, error) { +func GithubClient(ctx context.Context, token *oauth2.Token, apiDuration *prometheus.HistogramVec, apiRateLimit prometheus.Gauge) (*githubClient, error) { return &githubClient{ - client: github.NewClient(nil).WithAuthToken(token.AccessToken), + client: github.NewClient(nil).WithAuthToken(token.AccessToken), + apiDuration: apiDuration, + apiRateLimit: apiRateLimit, }, nil } type githubClient struct { client *github.Client + + apiDuration *prometheus.HistogramVec + apiRateLimit prometheus.Gauge } func (gh *githubClient) GetCommit(ctx context.Context, owner, repo, ref string) (*vcsv1.GetCommitResponse, error) { - commit, _, err := gh.client.Repositories.GetCommit(ctx, owner, repo, ref, nil) + start := time.Now() + commit, res, err := gh.client.Repositories.GetCommit(ctx, owner, repo, ref, nil) + gh.apiDuration.WithLabelValues("/repos/{owner}/{repo}/commits/{ref}").Observe(time.Since(start).Seconds()) + gh.apiRateLimit.Set(float64(res.Rate.Remaining)) if err != nil { return nil, err } + return &vcsv1.GetCommitResponse{ Sha: toString(commit.SHA), Message: toString(commit.Commit.Message), @@ -45,25 +55,33 @@ func (gh *githubClient) GetFile(ctx context.Context, req FileRequest) (File, err // We could abstract away git provider using git protocol // git clone https://x-access-token:@github.com/owner/repo.git // For now we use the github client. - file, _, _, err := gh.client.Repositories.GetContents(ctx, req.Owner, req.Repo, req.Path, &github.RepositoryContentGetOptions{Ref: req.Ref}) + + start := time.Now() + file, _, res, err := gh.client.Repositories.GetContents(ctx, req.Owner, req.Repo, req.Path, &github.RepositoryContentGetOptions{Ref: req.Ref}) + gh.apiDuration.WithLabelValues("/repos/{owner}/{repo}/contents/{path}").Observe(time.Since(start).Seconds()) + gh.apiRateLimit.Set(float64(res.Rate.Remaining)) if err != nil { var githubErr *github.ErrorResponse - if ok := errors.As(err, &githubErr); ok && githubErr.Response.StatusCode == http.StatusNotFound { + if errors.As(err, &githubErr) && githubErr.Response.StatusCode == http.StatusNotFound { return File{}, fmt.Errorf("%w: %s", ErrNotFound, err) } return File{}, err } + if file == nil { return File{}, ErrNotFound } + // We only support files retrieval. if file.Type != nil && *file.Type != "file" { return File{}, connect.NewError(connect.CodeInvalidArgument, errors.New("path is not a file")) } + content, err := file.GetContent() if err != nil { return File{}, err } + return File{ Content: content, URL: toString(file.HTMLURL), diff --git a/pkg/querier/vcs/service.go b/pkg/querier/vcs/service.go index d4277c7ba3..360cdc7622 100644 --- a/pkg/querier/vcs/service.go +++ b/pkg/querier/vcs/service.go @@ -11,6 +11,8 @@ import ( "github.com/go-kit/log" giturl "github.com/kubescape/go-git-url" "github.com/kubescape/go-git-url/apis" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "golang.org/x/oauth2" vcsv1 "github.com/grafana/pyroscope/api/gen/proto/go/vcs/v1" @@ -23,11 +25,28 @@ var _ vcsv1connect.VCSServiceHandler = (*Service)(nil) type Service struct { logger log.Logger + + githubAPIDuration *prometheus.HistogramVec + githubAPIRateLimit prometheus.Gauge } -func New(logger log.Logger) *Service { +func New(logger log.Logger, reg prometheus.Registerer) *Service { return &Service{ logger: logger, + githubAPIDuration: promauto.With(reg).NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "pyroscope", + Name: "vcs_github_request_duration", + Help: "Duration of GitHub API requests in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.1, 10, 8), + }, + []string{"path"}, + ), + githubAPIRateLimit: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ + Namespace: "pyroscope", + Name: "vcs_github_rate_limit", + Help: "Remaining GitHub API requests before rate limiting occurs", + }), } } @@ -44,7 +63,9 @@ func (q *Service) GithubLogin(ctx context.Context, req *connect.Request[vcsv1.Gi return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("failed to authorize with GitHub")) } + start := time.Now() token, err := cfg.Exchange(ctx, req.Msg.AuthorizationCode) + q.githubAPIDuration.WithLabelValues("/login/oauth/authorize").Observe(time.Since(start).Seconds()) if err != nil { q.logger.Log("err", err, "msg", "failed to exchange authorization code with GitHub") return nil, connect.NewError(connect.CodeUnauthenticated, fmt.Errorf("failed to authorize with GitHub")) @@ -75,7 +96,9 @@ func (q *Service) GithubRefresh(ctx context.Context, req *connect.Request[vcsv1. return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("failed to refresh token")) } + start := time.Now() githubToken, err := refreshGithubToken(githubRequest) + q.githubAPIDuration.WithLabelValues("/login/oauth/access_token").Observe(time.Since(start).Seconds()) if err != nil { q.logger.Log("err", err, "msg", "failed to refresh token with GitHub") return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("failed to refresh token")) @@ -118,7 +141,7 @@ func (q *Service) GetFile(ctx context.Context, req *connect.Request[vcsv1.GetFil } // todo: we can support multiple provider: bitbucket, gitlab, etc. - ghClient, err := client.GithubClient(ctx, token) + ghClient, err := client.GithubClient(ctx, token, q.githubAPIDuration, q.githubAPIRateLimit) if err != nil { return nil, err } @@ -161,7 +184,7 @@ func (q *Service) GetCommit(ctx context.Context, req *connect.Request[vcsv1.GetC return nil, connect.NewError(connect.CodeInvalidArgument, errors.New("only GitHub repositories are supported")) } - ghClient, err := client.GithubClient(ctx, token) + ghClient, err := client.GithubClient(ctx, token, q.githubAPIDuration, q.githubAPIRateLimit) if err != nil { return nil, err } From da2a7de11fa4af22a13aa8abac0c48f35a5a8e12 Mon Sep 17 00:00:00 2001 From: Bryan Huhta Date: Thu, 2 May 2024 17:52:26 -0500 Subject: [PATCH 3/6] Refactor metrics reporting --- pkg/querier/vcs/client/github.go | 20 ++---- pkg/querier/vcs/client/metrics.go | 103 ++++++++++++++++++++++++++++++ pkg/querier/vcs/service.go | 32 +++------- 3 files changed, 118 insertions(+), 37 deletions(-) create mode 100644 pkg/querier/vcs/client/metrics.go diff --git a/pkg/querier/vcs/client/github.go b/pkg/querier/vcs/client/github.go index f415185eab..a022a025eb 100644 --- a/pkg/querier/vcs/client/github.go +++ b/pkg/querier/vcs/client/github.go @@ -9,33 +9,28 @@ import ( "connectrpc.com/connect" "github.com/google/go-github/v58/github" - "github.com/prometheus/client_golang/prometheus" "golang.org/x/oauth2" vcsv1 "github.com/grafana/pyroscope/api/gen/proto/go/vcs/v1" ) // GithubClient returns a github client. -func GithubClient(ctx context.Context, token *oauth2.Token, apiDuration *prometheus.HistogramVec, apiRateLimit prometheus.Gauge) (*githubClient, error) { +func GithubClient(ctx context.Context, token *oauth2.Token, metrics Metrics) (*githubClient, error) { return &githubClient{ - client: github.NewClient(nil).WithAuthToken(token.AccessToken), - apiDuration: apiDuration, - apiRateLimit: apiRateLimit, + client: github.NewClient(nil).WithAuthToken(token.AccessToken), + metrics: metrics, }, nil } type githubClient struct { - client *github.Client - - apiDuration *prometheus.HistogramVec - apiRateLimit prometheus.Gauge + client *github.Client + metrics Metrics } func (gh *githubClient) GetCommit(ctx context.Context, owner, repo, ref string) (*vcsv1.GetCommitResponse, error) { start := time.Now() commit, res, err := gh.client.Repositories.GetCommit(ctx, owner, repo, ref, nil) - gh.apiDuration.WithLabelValues("/repos/{owner}/{repo}/commits/{ref}").Observe(time.Since(start).Seconds()) - gh.apiRateLimit.Set(float64(res.Rate.Remaining)) + gh.metrics.GetCommitObserve(time.Since(start), res, err) if err != nil { return nil, err } @@ -58,8 +53,7 @@ func (gh *githubClient) GetFile(ctx context.Context, req FileRequest) (File, err start := time.Now() file, _, res, err := gh.client.Repositories.GetContents(ctx, req.Owner, req.Repo, req.Path, &github.RepositoryContentGetOptions{Ref: req.Ref}) - gh.apiDuration.WithLabelValues("/repos/{owner}/{repo}/contents/{path}").Observe(time.Since(start).Seconds()) - gh.apiRateLimit.Set(float64(res.Rate.Remaining)) + gh.metrics.GetFileObserve(time.Since(start), res, err) if err != nil { var githubErr *github.ErrorResponse if errors.As(err, &githubErr) && githubErr.Response.StatusCode == http.StatusNotFound { diff --git a/pkg/querier/vcs/client/metrics.go b/pkg/querier/vcs/client/metrics.go new file mode 100644 index 0000000000..39d427a774 --- /dev/null +++ b/pkg/querier/vcs/client/metrics.go @@ -0,0 +1,103 @@ +package client + +import ( + "fmt" + "time" + + "github.com/google/go-github/v58/github" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +// Metrics can record events surrounding various client actions, like logging in +// or fetching a file. +type Metrics interface { + LoginObserve(elapsed time.Duration, err error) + RefreshObserve(elapsed time.Duration, err error) + GetCommitObserve(elapsed time.Duration, res *github.Response, err error) + GetFileObserve(elapsed time.Duration, res *github.Response, err error) +} + +func NewMetrics(reg prometheus.Registerer) Metrics { + return &githubMetrics{ + APIDuration: promauto.With(reg).NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "pyroscope", + Name: "vcs_github_request_duration", + Help: "Duration of GitHub API requests in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.1, 10, 8), + }, + []string{"path", "status"}, + ), + APIRateLimit: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ + Namespace: "pyroscope", + Name: "vcs_github_rate_limit", + Help: "Remaining GitHub API requests before rate limiting occurs", + }), + } +} + +type githubMetrics struct { + APIDuration *prometheus.HistogramVec + APIRateLimit prometheus.Gauge +} + +func (m *githubMetrics) LoginObserve(elapsed time.Duration, err error) { + // We technically don't know the true status codes of the OAuth login flow, + // but we'll assume "no error" means 200 and "an error" means 400. A 400 is + // chosen because that's what we report to the user. + status := "200" + if err != nil { + status = "400" + } + + m.APIDuration. + WithLabelValues("/login/oauth/authorize", status). + Observe(elapsed.Seconds()) +} + +func (m *githubMetrics) RefreshObserve(elapsed time.Duration, err error) { + // We technically don't know the true status codes of the OAuth login flow, + // but we'll assume "no error" means 200 and "an error" means 400. A 400 is + // chosen because that's what we report to the user. + status := "200" + if err != nil { + status = "400" + } + + m.APIDuration. + WithLabelValues("/login/oauth/access_token", status). + Observe(elapsed.Seconds()) +} + +func (m *githubMetrics) GetCommitObserve(elapsed time.Duration, res *github.Response, err error) { + var status string + if res != nil { + status = fmt.Sprintf("%d", res.StatusCode) + m.APIRateLimit.Set(float64(res.Rate.Remaining)) + } + + if err != nil { + status = "500" + } + + m.APIDuration. + WithLabelValues("/repos/{owner}/{repo}/commits/{ref}", status). + Observe(elapsed.Seconds()) +} + +func (m *githubMetrics) GetFileObserve(elapsed time.Duration, res *github.Response, err error) { + var status string + if res != nil { + status = fmt.Sprintf("%d", res.StatusCode) + m.APIRateLimit.Set(float64(res.Rate.Remaining)) + } + + if err != nil { + status = "500" + } + + m.APIDuration. + WithLabelValues("/repos/{owner}/{repo}/contents/{path}", status). + Observe(elapsed.Seconds()) +} diff --git a/pkg/querier/vcs/service.go b/pkg/querier/vcs/service.go index 360cdc7622..f2d72fb636 100644 --- a/pkg/querier/vcs/service.go +++ b/pkg/querier/vcs/service.go @@ -12,7 +12,6 @@ import ( giturl "github.com/kubescape/go-git-url" "github.com/kubescape/go-git-url/apis" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" "golang.org/x/oauth2" vcsv1 "github.com/grafana/pyroscope/api/gen/proto/go/vcs/v1" @@ -24,29 +23,14 @@ import ( var _ vcsv1connect.VCSServiceHandler = (*Service)(nil) type Service struct { - logger log.Logger - - githubAPIDuration *prometheus.HistogramVec - githubAPIRateLimit prometheus.Gauge + logger log.Logger + metrics client.Metrics } func New(logger log.Logger, reg prometheus.Registerer) *Service { return &Service{ - logger: logger, - githubAPIDuration: promauto.With(reg).NewHistogramVec( - prometheus.HistogramOpts{ - Namespace: "pyroscope", - Name: "vcs_github_request_duration", - Help: "Duration of GitHub API requests in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.1, 10, 8), - }, - []string{"path"}, - ), - githubAPIRateLimit: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Namespace: "pyroscope", - Name: "vcs_github_rate_limit", - Help: "Remaining GitHub API requests before rate limiting occurs", - }), + logger: logger, + metrics: client.NewMetrics(reg), } } @@ -65,7 +49,7 @@ func (q *Service) GithubLogin(ctx context.Context, req *connect.Request[vcsv1.Gi start := time.Now() token, err := cfg.Exchange(ctx, req.Msg.AuthorizationCode) - q.githubAPIDuration.WithLabelValues("/login/oauth/authorize").Observe(time.Since(start).Seconds()) + q.metrics.LoginObserve(time.Since(start), err) if err != nil { q.logger.Log("err", err, "msg", "failed to exchange authorization code with GitHub") return nil, connect.NewError(connect.CodeUnauthenticated, fmt.Errorf("failed to authorize with GitHub")) @@ -98,7 +82,7 @@ func (q *Service) GithubRefresh(ctx context.Context, req *connect.Request[vcsv1. start := time.Now() githubToken, err := refreshGithubToken(githubRequest) - q.githubAPIDuration.WithLabelValues("/login/oauth/access_token").Observe(time.Since(start).Seconds()) + q.metrics.RefreshObserve(time.Since(start), err) if err != nil { q.logger.Log("err", err, "msg", "failed to refresh token with GitHub") return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("failed to refresh token")) @@ -141,7 +125,7 @@ func (q *Service) GetFile(ctx context.Context, req *connect.Request[vcsv1.GetFil } // todo: we can support multiple provider: bitbucket, gitlab, etc. - ghClient, err := client.GithubClient(ctx, token, q.githubAPIDuration, q.githubAPIRateLimit) + ghClient, err := client.GithubClient(ctx, token, q.metrics) if err != nil { return nil, err } @@ -184,7 +168,7 @@ func (q *Service) GetCommit(ctx context.Context, req *connect.Request[vcsv1.GetC return nil, connect.NewError(connect.CodeInvalidArgument, errors.New("only GitHub repositories are supported")) } - ghClient, err := client.GithubClient(ctx, token, q.githubAPIDuration, q.githubAPIRateLimit) + ghClient, err := client.GithubClient(ctx, token, q.metrics) if err != nil { return nil, err } From d2478fcced08883f38a4ef78fd11c1eb197643d9 Mon Sep 17 00:00:00 2001 From: Bryan Huhta Date: Fri, 10 May 2024 17:03:58 -0500 Subject: [PATCH 4/6] Allow http client to be wrapped by multiple instrumentors --- pkg/clientpool/ingester_client_pool.go | 4 ++- pkg/clientpool/store_gateway_client_pool.go | 4 ++- pkg/storegateway/clientpool/client_pool.go | 4 ++- pkg/util/http.go | 33 ++++++++++++++------- 4 files changed, 32 insertions(+), 13 deletions(-) diff --git a/pkg/clientpool/ingester_client_pool.go b/pkg/clientpool/ingester_client_pool.go index 84e9f8bd49..e5511fea5b 100644 --- a/pkg/clientpool/ingester_client_pool.go +++ b/pkg/clientpool/ingester_client_pool.go @@ -59,8 +59,10 @@ func (f *ingesterPoolFactory) FromInstance(inst ring.InstanceDesc) (ring_client. if err != nil { return nil, err } + + httpClient := util.InstrumentedDefaultHTTPClient(util.WithTracingTransport()) return &ingesterPoolClient{ - IngesterServiceClient: ingesterv1connect.NewIngesterServiceClient(util.InstrumentedHTTPClient(), "http://"+inst.Addr, f.options...), + IngesterServiceClient: ingesterv1connect.NewIngesterServiceClient(httpClient, "http://"+inst.Addr, f.options...), HealthClient: grpc_health_v1.NewHealthClient(conn), Closer: conn, }, nil diff --git a/pkg/clientpool/store_gateway_client_pool.go b/pkg/clientpool/store_gateway_client_pool.go index 50b0f59416..617f10afdc 100644 --- a/pkg/clientpool/store_gateway_client_pool.go +++ b/pkg/clientpool/store_gateway_client_pool.go @@ -44,8 +44,10 @@ func (f *storeGatewayPoolFactory) FromInstance(inst ring.InstanceDesc) (ring_cli if err != nil { return nil, err } + + httpClient := util.InstrumentedDefaultHTTPClient(util.WithTracingTransport()) return &storeGatewayPoolClient{ - StoreGatewayServiceClient: storegatewayv1connect.NewStoreGatewayServiceClient(util.InstrumentedHTTPClient(), "http://"+inst.Addr, f.options...), + StoreGatewayServiceClient: storegatewayv1connect.NewStoreGatewayServiceClient(httpClient, "http://"+inst.Addr, f.options...), HealthClient: grpc_health_v1.NewHealthClient(conn), Closer: conn, }, nil diff --git a/pkg/storegateway/clientpool/client_pool.go b/pkg/storegateway/clientpool/client_pool.go index 3e9973f783..d8400eb478 100644 --- a/pkg/storegateway/clientpool/client_pool.go +++ b/pkg/storegateway/clientpool/client_pool.go @@ -66,8 +66,10 @@ func (f *poolFactory) FromInstance(inst ring.InstanceDesc) (ring_client.PoolClie if err != nil { return nil, err } + + httpClient := util.InstrumentedDefaultHTTPClient(util.WithTracingTransport()) return &storeGatewayPoolClient{ - StoreGatewayServiceClient: storegatewayv1connect.NewStoreGatewayServiceClient(util.InstrumentedHTTPClient(), "http://"+inst.Addr, f.options...), + StoreGatewayServiceClient: storegatewayv1connect.NewStoreGatewayServiceClient(httpClient, "http://"+inst.Addr, f.options...), HealthClient: grpc_health_v1.NewHealthClient(conn), Closer: conn, }, nil diff --git a/pkg/util/http.go b/pkg/util/http.go index 109239a0d7..24a12e8e22 100644 --- a/pkg/util/http.go +++ b/pkg/util/http.go @@ -49,20 +49,33 @@ func (f RoundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) { return f(req) } -// InstrumentedHTTPClient returns a HTTP client with tracing instrumented default RoundTripper. -func InstrumentedHTTPClient() *http.Client { +type RoundTripperInstrumentFunc func(next http.RoundTripper) http.RoundTripper + +// InstrumentedDefaultHTTPClient returns an http client configured with some +// default settings which is wrapped with a variety of instrumented +// RoundTrippers. +func InstrumentedDefaultHTTPClient(instruments ...RoundTripperInstrumentFunc) *http.Client { + transport := defaultTransport + + for i := len(instruments) - 1; i >= 0; i-- { + transport = instruments[i](transport) + } + return &http.Client{ - Transport: WrapWithInstrumentedHTTPTransport(defaultTransport), + Transport: transport, } } -// WrapWithInstrumentedHTTPTransport wraps the given RoundTripper with an tracing instrumented one. -func WrapWithInstrumentedHTTPTransport(next http.RoundTripper) http.RoundTripper { - next = &nethttp.Transport{RoundTripper: next} - return RoundTripperFunc(func(req *http.Request) (*http.Response, error) { - req = nethttp.TraceRequest(opentracing.GlobalTracer(), req) - return next.RoundTrip(req) - }) +// WithTracingTransport wraps the given RoundTripper with a tracing instrumented +// one. +func WithTracingTransport() RoundTripperInstrumentFunc { + return func(next http.RoundTripper) http.RoundTripper { + next = &nethttp.Transport{RoundTripper: next} + return RoundTripperFunc(func(req *http.Request) (*http.Response, error) { + req = nethttp.TraceRequest(opentracing.GlobalTracer(), req) + return next.RoundTrip(req) + }) + } } // WriteYAMLResponse writes some YAML as a HTTP response. From 5916d89b24bb09eff2915930c2f46256a8bdd761 Mon Sep 17 00:00:00 2001 From: Bryan Huhta Date: Thu, 16 May 2024 15:17:46 -0500 Subject: [PATCH 5/6] Add new helpers for creating instrumented http clients --- pkg/util/http.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pkg/util/http.go b/pkg/util/http.go index 24a12e8e22..578af63c4a 100644 --- a/pkg/util/http.go +++ b/pkg/util/http.go @@ -55,15 +55,19 @@ type RoundTripperInstrumentFunc func(next http.RoundTripper) http.RoundTripper // default settings which is wrapped with a variety of instrumented // RoundTrippers. func InstrumentedDefaultHTTPClient(instruments ...RoundTripperInstrumentFunc) *http.Client { - transport := defaultTransport - - for i := len(instruments) - 1; i >= 0; i-- { - transport = instruments[i](transport) + client := &http.Client{ + Transport: defaultTransport, } + return InstrumentedHTTPClient(client, instruments...) +} - return &http.Client{ - Transport: transport, +// InstrumentedHTTPClient adds the associated instrumentation middlewares to the +// provided http client. +func InstrumentedHTTPClient(client *http.Client, instruments ...RoundTripperInstrumentFunc) *http.Client { + for i := len(instruments) - 1; i >= 0; i-- { + client.Transport = instruments[i](client.Transport) } + return client } // WithTracingTransport wraps the given RoundTripper with a tracing instrumented From 3009568356008f996b645606e33737ce4054f719 Mon Sep 17 00:00:00 2001 From: Bryan Huhta Date: Thu, 16 May 2024 15:19:32 -0500 Subject: [PATCH 6/6] Track github api metrics with instrumented http client --- pkg/querier/vcs/client/github.go | 16 +-- pkg/querier/vcs/client/metrics.go | 135 +++++++++++-------------- pkg/querier/vcs/client/metrics_test.go | 78 ++++++++++++++ pkg/querier/vcs/github.go | 5 +- pkg/querier/vcs/github_test.go | 2 +- pkg/querier/vcs/service.go | 20 ++-- 6 files changed, 152 insertions(+), 104 deletions(-) create mode 100644 pkg/querier/vcs/client/metrics_test.go diff --git a/pkg/querier/vcs/client/github.go b/pkg/querier/vcs/client/github.go index a022a025eb..1ccf9578a7 100644 --- a/pkg/querier/vcs/client/github.go +++ b/pkg/querier/vcs/client/github.go @@ -15,22 +15,18 @@ import ( ) // GithubClient returns a github client. -func GithubClient(ctx context.Context, token *oauth2.Token, metrics Metrics) (*githubClient, error) { +func GithubClient(ctx context.Context, token *oauth2.Token, client *http.Client) (*githubClient, error) { return &githubClient{ - client: github.NewClient(nil).WithAuthToken(token.AccessToken), - metrics: metrics, + client: github.NewClient(client).WithAuthToken(token.AccessToken), }, nil } type githubClient struct { - client *github.Client - metrics Metrics + client *github.Client } func (gh *githubClient) GetCommit(ctx context.Context, owner, repo, ref string) (*vcsv1.GetCommitResponse, error) { - start := time.Now() - commit, res, err := gh.client.Repositories.GetCommit(ctx, owner, repo, ref, nil) - gh.metrics.GetCommitObserve(time.Since(start), res, err) + commit, _, err := gh.client.Repositories.GetCommit(ctx, owner, repo, ref, nil) if err != nil { return nil, err } @@ -51,9 +47,7 @@ func (gh *githubClient) GetFile(ctx context.Context, req FileRequest) (File, err // git clone https://x-access-token:@github.com/owner/repo.git // For now we use the github client. - start := time.Now() - file, _, res, err := gh.client.Repositories.GetContents(ctx, req.Owner, req.Repo, req.Path, &github.RepositoryContentGetOptions{Ref: req.Ref}) - gh.metrics.GetFileObserve(time.Since(start), res, err) + file, _, _, err := gh.client.Repositories.GetContents(ctx, req.Owner, req.Repo, req.Path, &github.RepositoryContentGetOptions{Ref: req.Ref}) if err != nil { var githubErr *github.ErrorResponse if errors.As(err, &githubErr) && githubErr.Response.StatusCode == http.StatusNotFound { diff --git a/pkg/querier/vcs/client/metrics.go b/pkg/querier/vcs/client/metrics.go index 39d427a774..f8b28c95ef 100644 --- a/pkg/querier/vcs/client/metrics.go +++ b/pkg/querier/vcs/client/metrics.go @@ -2,102 +2,83 @@ package client import ( "fmt" + "net/http" + "regexp" "time" - "github.com/google/go-github/v58/github" + "github.com/go-kit/log" + "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" -) -// Metrics can record events surrounding various client actions, like logging in -// or fetching a file. -type Metrics interface { - LoginObserve(elapsed time.Duration, err error) - RefreshObserve(elapsed time.Duration, err error) - GetCommitObserve(elapsed time.Duration, res *github.Response, err error) - GetFileObserve(elapsed time.Duration, res *github.Response, err error) -} + "github.com/grafana/pyroscope/pkg/util" +) -func NewMetrics(reg prometheus.Registerer) Metrics { - return &githubMetrics{ - APIDuration: promauto.With(reg).NewHistogramVec( - prometheus.HistogramOpts{ - Namespace: "pyroscope", - Name: "vcs_github_request_duration", - Help: "Duration of GitHub API requests in seconds", - Buckets: prometheus.ExponentialBucketsRange(0.1, 10, 8), - }, - []string{"path", "status"}, - ), - APIRateLimit: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ - Namespace: "pyroscope", - Name: "vcs_github_rate_limit", - Help: "Remaining GitHub API requests before rate limiting occurs", - }), - } -} +var ( + githubRouteMatchers = map[string]*regexp.Regexp{ + // Get repository contents. + // https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28#get-repository-content + "/repos/{owner}/{repo}/contents/{path}": regexp.MustCompile(`^\/repos\/\S+\/\S+\/contents\/\S+$`), -type githubMetrics struct { - APIDuration *prometheus.HistogramVec - APIRateLimit prometheus.Gauge -} + // Get a commit. + // https://docs.github.com/en/rest/commits/commits?apiVersion=2022-11-28#get-a-commit + "/repos/{owner}/{repo}/commits/{ref}": regexp.MustCompile(`^\/repos\/\S+\/\S+\/commits\/\S+$`), -func (m *githubMetrics) LoginObserve(elapsed time.Duration, err error) { - // We technically don't know the true status codes of the OAuth login flow, - // but we'll assume "no error" means 200 and "an error" means 400. A 400 is - // chosen because that's what we report to the user. - status := "200" - if err != nil { - status = "400" + // Refresh auth token. + // https://docs.github.com/en/apps/creating-github-apps/authenticating-with-a-github-app/refreshing-user-access-tokens#refreshing-a-user-access-token-with-a-refresh-token + "/login/oauth/access_token": regexp.MustCompile(`^\/login\/oauth\/access_token$`), } +) - m.APIDuration. - WithLabelValues("/login/oauth/authorize", status). - Observe(elapsed.Seconds()) -} +func InstrumentedHTTPClient(logger log.Logger, reg prometheus.Registerer) *http.Client { + apiDuration := promauto.With(reg).NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "pyroscope", + Name: "vcs_github_request_duration", + Help: "Duration of GitHub API requests in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.1, 10, 8), + }, + []string{"method", "route", "status_code"}, + ) -func (m *githubMetrics) RefreshObserve(elapsed time.Duration, err error) { - // We technically don't know the true status codes of the OAuth login flow, - // but we'll assume "no error" means 200 and "an error" means 400. A 400 is - // chosen because that's what we report to the user. - status := "200" - if err != nil { - status = "400" + defaultClient := &http.Client{ + Timeout: 10 * time.Second, + Transport: http.DefaultTransport, } - - m.APIDuration. - WithLabelValues("/login/oauth/access_token", status). - Observe(elapsed.Seconds()) + client := util.InstrumentedHTTPClient(defaultClient, withGitHubMetricsTransport(logger, apiDuration)) + return client } -func (m *githubMetrics) GetCommitObserve(elapsed time.Duration, res *github.Response, err error) { - var status string - if res != nil { - status = fmt.Sprintf("%d", res.StatusCode) - m.APIRateLimit.Set(float64(res.Rate.Remaining)) - } +// withGitHubMetricsTransport wraps a transport with a client to track GitHub +// API usage. +func withGitHubMetricsTransport(logger log.Logger, hv *prometheus.HistogramVec) util.RoundTripperInstrumentFunc { + return func(next http.RoundTripper) http.RoundTripper { + return util.RoundTripperFunc(func(req *http.Request) (*http.Response, error) { + route := matchGitHubAPIRoute(req.URL.Path) + statusCode := "" + start := time.Now() - if err != nil { - status = "500" - } + res, err := next.RoundTrip(req) + if err == nil { + statusCode = fmt.Sprintf("%d", res.StatusCode) + } - m.APIDuration. - WithLabelValues("/repos/{owner}/{repo}/commits/{ref}", status). - Observe(elapsed.Seconds()) -} + if route == "unknown_route" { + level.Warn(logger).Log("path", req.URL.Path, "msg", "unknown GitHub API route") + } + hv.WithLabelValues(req.Method, route, statusCode).Observe(time.Since(start).Seconds()) -func (m *githubMetrics) GetFileObserve(elapsed time.Duration, res *github.Response, err error) { - var status string - if res != nil { - status = fmt.Sprintf("%d", res.StatusCode) - m.APIRateLimit.Set(float64(res.Rate.Remaining)) + return res, err + }) } +} - if err != nil { - status = "500" +func matchGitHubAPIRoute(path string) string { + for route, regex := range githubRouteMatchers { + if regex.MatchString(path) { + return route + } } - m.APIDuration. - WithLabelValues("/repos/{owner}/{repo}/contents/{path}", status). - Observe(elapsed.Seconds()) + return "unknown_route" } diff --git a/pkg/querier/vcs/client/metrics_test.go b/pkg/querier/vcs/client/metrics_test.go new file mode 100644 index 0000000000..5ce0de6b32 --- /dev/null +++ b/pkg/querier/vcs/client/metrics_test.go @@ -0,0 +1,78 @@ +package client + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func Test_matchGitHubAPIRoute(t *testing.T) { + tests := []struct { + Name string + Path string + Want string + }{ + { + Name: "GetContents", + Path: "/repos/grafana/pyroscope/contents/pkg/querier/querier.go", + Want: "/repos/{owner}/{repo}/contents/{path}", + }, + { + Name: "GetContents with dash", + Path: "/repos/connectrpc/connect-go/contents/protocol.go", + Want: "/repos/{owner}/{repo}/contents/{path}", + }, + { + Name: "GetContents without path", + Path: "/repos/grafana/pyroscope/contents/", + Want: "unknown_route", + }, + { + Name: "GetContents with whitespace in path", + Path: "/repos/grafana/pyroscope/contents/path with spaces", + Want: "unknown_route", + }, + { + Name: "GetCommit", + Path: "/repos/grafana/pyroscope/commits/abcdef1234567890", + Want: "/repos/{owner}/{repo}/commits/{ref}", + }, + { + Name: "GetCommit with lowercase and uppercase ref", + Path: "/repos/grafana/pyroscope/commits/abcdefABCDEF1234567890", + Want: "/repos/{owner}/{repo}/commits/{ref}", + }, + { + Name: "GetCommit with non-hexadecimal ref", + Path: "/repos/grafana/pyroscope/commits/HEAD", + Want: "/repos/{owner}/{repo}/commits/{ref}", + }, + { + Name: "GetCommit without commit", + Path: "/repos/grafana/pyroscope/commits/", + Want: "unknown_route", + }, + { + Name: "Refresh", + Path: "/login/oauth/access_token", + Want: "/login/oauth/access_token", + }, + { + Name: "empty path", + Path: "", + Want: "unknown_route", + }, + { + Name: "unmapped path", + Path: "/some/random/path", + Want: "unknown_route", + }, + } + + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + got := matchGitHubAPIRoute(tt.Path) + require.Equal(t, tt.Want, got) + }) + } +} diff --git a/pkg/querier/vcs/github.go b/pkg/querier/vcs/github.go index b256a11fd7..1b3599a36a 100644 --- a/pkg/querier/vcs/github.go +++ b/pkg/querier/vcs/github.go @@ -65,10 +65,7 @@ func githubOAuthConfig() (*oauth2.Config, error) { // refreshGithubToken sends a request configured for the GitHub API and marshals // the response into a githubAuthToken. -func refreshGithubToken(req *http.Request) (*githubAuthToken, error) { - client := http.Client{ - Timeout: 10 * time.Second, - } +func refreshGithubToken(req *http.Request, client *http.Client) (*githubAuthToken, error) { res, err := client.Do(req) if err != nil { return nil, fmt.Errorf("failed to make request: %w", err) diff --git a/pkg/querier/vcs/github_test.go b/pkg/querier/vcs/github_test.go index 9fecf16dfd..0354507c5c 100644 --- a/pkg/querier/vcs/github_test.go +++ b/pkg/querier/vcs/github_test.go @@ -66,7 +66,7 @@ func Test_refreshGithubToken(t *testing.T) { req, err := http.NewRequest("POST", fakeGithubAPI.URL, nil) require.NoError(t, err) - got, err := refreshGithubToken(req) + got, err := refreshGithubToken(req, http.DefaultClient) require.NoError(t, err) require.Equal(t, want, *got) } diff --git a/pkg/querier/vcs/service.go b/pkg/querier/vcs/service.go index f2d72fb636..b03bba05b4 100644 --- a/pkg/querier/vcs/service.go +++ b/pkg/querier/vcs/service.go @@ -23,14 +23,16 @@ import ( var _ vcsv1connect.VCSServiceHandler = (*Service)(nil) type Service struct { - logger log.Logger - metrics client.Metrics + logger log.Logger + httpClient *http.Client } func New(logger log.Logger, reg prometheus.Registerer) *Service { + httpClient := client.InstrumentedHTTPClient(logger, reg) + return &Service{ - logger: logger, - metrics: client.NewMetrics(reg), + logger: logger, + httpClient: httpClient, } } @@ -47,9 +49,7 @@ func (q *Service) GithubLogin(ctx context.Context, req *connect.Request[vcsv1.Gi return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("failed to authorize with GitHub")) } - start := time.Now() token, err := cfg.Exchange(ctx, req.Msg.AuthorizationCode) - q.metrics.LoginObserve(time.Since(start), err) if err != nil { q.logger.Log("err", err, "msg", "failed to exchange authorization code with GitHub") return nil, connect.NewError(connect.CodeUnauthenticated, fmt.Errorf("failed to authorize with GitHub")) @@ -80,9 +80,7 @@ func (q *Service) GithubRefresh(ctx context.Context, req *connect.Request[vcsv1. return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("failed to refresh token")) } - start := time.Now() - githubToken, err := refreshGithubToken(githubRequest) - q.metrics.RefreshObserve(time.Since(start), err) + githubToken, err := refreshGithubToken(githubRequest, q.httpClient) if err != nil { q.logger.Log("err", err, "msg", "failed to refresh token with GitHub") return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("failed to refresh token")) @@ -125,7 +123,7 @@ func (q *Service) GetFile(ctx context.Context, req *connect.Request[vcsv1.GetFil } // todo: we can support multiple provider: bitbucket, gitlab, etc. - ghClient, err := client.GithubClient(ctx, token, q.metrics) + ghClient, err := client.GithubClient(ctx, token, q.httpClient) if err != nil { return nil, err } @@ -168,7 +166,7 @@ func (q *Service) GetCommit(ctx context.Context, req *connect.Request[vcsv1.GetC return nil, connect.NewError(connect.CodeInvalidArgument, errors.New("only GitHub repositories are supported")) } - ghClient, err := client.GithubClient(ctx, token, q.metrics) + ghClient, err := client.GithubClient(ctx, token, q.httpClient) if err != nil { return nil, err }