diff --git a/pkg/clientpool/ingester_client_pool.go b/pkg/clientpool/ingester_client_pool.go index 84e9f8bd49..e5511fea5b 100644 --- a/pkg/clientpool/ingester_client_pool.go +++ b/pkg/clientpool/ingester_client_pool.go @@ -59,8 +59,10 @@ func (f *ingesterPoolFactory) FromInstance(inst ring.InstanceDesc) (ring_client. if err != nil { return nil, err } + + httpClient := util.InstrumentedDefaultHTTPClient(util.WithTracingTransport()) return &ingesterPoolClient{ - IngesterServiceClient: ingesterv1connect.NewIngesterServiceClient(util.InstrumentedHTTPClient(), "http://"+inst.Addr, f.options...), + IngesterServiceClient: ingesterv1connect.NewIngesterServiceClient(httpClient, "http://"+inst.Addr, f.options...), HealthClient: grpc_health_v1.NewHealthClient(conn), Closer: conn, }, nil diff --git a/pkg/clientpool/store_gateway_client_pool.go b/pkg/clientpool/store_gateway_client_pool.go index 50b0f59416..617f10afdc 100644 --- a/pkg/clientpool/store_gateway_client_pool.go +++ b/pkg/clientpool/store_gateway_client_pool.go @@ -44,8 +44,10 @@ func (f *storeGatewayPoolFactory) FromInstance(inst ring.InstanceDesc) (ring_cli if err != nil { return nil, err } + + httpClient := util.InstrumentedDefaultHTTPClient(util.WithTracingTransport()) return &storeGatewayPoolClient{ - StoreGatewayServiceClient: storegatewayv1connect.NewStoreGatewayServiceClient(util.InstrumentedHTTPClient(), "http://"+inst.Addr, f.options...), + StoreGatewayServiceClient: storegatewayv1connect.NewStoreGatewayServiceClient(httpClient, "http://"+inst.Addr, f.options...), HealthClient: grpc_health_v1.NewHealthClient(conn), Closer: conn, }, nil diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go index 2012b078ea..9aa54f294a 100644 --- a/pkg/querier/querier.go +++ b/pkg/querier/querier.go @@ -131,7 +131,7 @@ func New(params *NewQuerierParams) (*Querier, error) { params.IngestersRing, ), storeGatewayQuerier: storeGatewayQuerier, - VCSServiceHandler: vcs.New(params.Logger), + VCSServiceHandler: vcs.New(params.Logger, params.Reg), storageBucket: params.StorageBucket, tenantConfigProvider: params.CfgProvider, limits: params.Overrides, diff --git a/pkg/querier/vcs/client/github.go b/pkg/querier/vcs/client/github.go index 263bc6d92a..1ccf9578a7 100644 --- a/pkg/querier/vcs/client/github.go +++ b/pkg/querier/vcs/client/github.go @@ -15,9 +15,9 @@ import ( ) // GithubClient returns a github client. -func GithubClient(ctx context.Context, token *oauth2.Token) (*githubClient, error) { +func GithubClient(ctx context.Context, token *oauth2.Token, client *http.Client) (*githubClient, error) { return &githubClient{ - client: github.NewClient(nil).WithAuthToken(token.AccessToken), + client: github.NewClient(client).WithAuthToken(token.AccessToken), }, nil } @@ -30,6 +30,7 @@ func (gh *githubClient) GetCommit(ctx context.Context, owner, repo, ref string) if err != nil { return nil, err } + return &vcsv1.GetCommitResponse{ Sha: toString(commit.SHA), Message: toString(commit.Commit.Message), @@ -45,25 +46,30 @@ func (gh *githubClient) GetFile(ctx context.Context, req FileRequest) (File, err // We could abstract away git provider using git protocol // git clone https://x-access-token:@github.com/owner/repo.git // For now we use the github client. + file, _, _, err := gh.client.Repositories.GetContents(ctx, req.Owner, req.Repo, req.Path, &github.RepositoryContentGetOptions{Ref: req.Ref}) if err != nil { var githubErr *github.ErrorResponse - if ok := errors.As(err, &githubErr); ok && githubErr.Response.StatusCode == http.StatusNotFound { + if errors.As(err, &githubErr) && githubErr.Response.StatusCode == http.StatusNotFound { return File{}, fmt.Errorf("%w: %s", ErrNotFound, err) } return File{}, err } + if file == nil { return File{}, ErrNotFound } + // We only support files retrieval. if file.Type != nil && *file.Type != "file" { return File{}, connect.NewError(connect.CodeInvalidArgument, errors.New("path is not a file")) } + content, err := file.GetContent() if err != nil { return File{}, err } + return File{ Content: content, URL: toString(file.HTMLURL), diff --git a/pkg/querier/vcs/client/metrics.go b/pkg/querier/vcs/client/metrics.go new file mode 100644 index 0000000000..f8b28c95ef --- /dev/null +++ b/pkg/querier/vcs/client/metrics.go @@ -0,0 +1,84 @@ +package client + +import ( + "fmt" + "net/http" + "regexp" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/grafana/pyroscope/pkg/util" +) + +var ( + githubRouteMatchers = map[string]*regexp.Regexp{ + // Get repository contents. + // https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28#get-repository-content + "/repos/{owner}/{repo}/contents/{path}": regexp.MustCompile(`^\/repos\/\S+\/\S+\/contents\/\S+$`), + + // Get a commit. + // https://docs.github.com/en/rest/commits/commits?apiVersion=2022-11-28#get-a-commit + "/repos/{owner}/{repo}/commits/{ref}": regexp.MustCompile(`^\/repos\/\S+\/\S+\/commits\/\S+$`), + + // Refresh auth token. + // https://docs.github.com/en/apps/creating-github-apps/authenticating-with-a-github-app/refreshing-user-access-tokens#refreshing-a-user-access-token-with-a-refresh-token + "/login/oauth/access_token": regexp.MustCompile(`^\/login\/oauth\/access_token$`), + } +) + +func InstrumentedHTTPClient(logger log.Logger, reg prometheus.Registerer) *http.Client { + apiDuration := promauto.With(reg).NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "pyroscope", + Name: "vcs_github_request_duration", + Help: "Duration of GitHub API requests in seconds", + Buckets: prometheus.ExponentialBucketsRange(0.1, 10, 8), + }, + []string{"method", "route", "status_code"}, + ) + + defaultClient := &http.Client{ + Timeout: 10 * time.Second, + Transport: http.DefaultTransport, + } + client := util.InstrumentedHTTPClient(defaultClient, withGitHubMetricsTransport(logger, apiDuration)) + return client +} + +// withGitHubMetricsTransport wraps a transport with a client to track GitHub +// API usage. +func withGitHubMetricsTransport(logger log.Logger, hv *prometheus.HistogramVec) util.RoundTripperInstrumentFunc { + return func(next http.RoundTripper) http.RoundTripper { + return util.RoundTripperFunc(func(req *http.Request) (*http.Response, error) { + route := matchGitHubAPIRoute(req.URL.Path) + statusCode := "" + start := time.Now() + + res, err := next.RoundTrip(req) + if err == nil { + statusCode = fmt.Sprintf("%d", res.StatusCode) + } + + if route == "unknown_route" { + level.Warn(logger).Log("path", req.URL.Path, "msg", "unknown GitHub API route") + } + hv.WithLabelValues(req.Method, route, statusCode).Observe(time.Since(start).Seconds()) + + return res, err + }) + } +} + +func matchGitHubAPIRoute(path string) string { + for route, regex := range githubRouteMatchers { + if regex.MatchString(path) { + return route + } + } + + return "unknown_route" +} diff --git a/pkg/querier/vcs/client/metrics_test.go b/pkg/querier/vcs/client/metrics_test.go new file mode 100644 index 0000000000..5ce0de6b32 --- /dev/null +++ b/pkg/querier/vcs/client/metrics_test.go @@ -0,0 +1,78 @@ +package client + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func Test_matchGitHubAPIRoute(t *testing.T) { + tests := []struct { + Name string + Path string + Want string + }{ + { + Name: "GetContents", + Path: "/repos/grafana/pyroscope/contents/pkg/querier/querier.go", + Want: "/repos/{owner}/{repo}/contents/{path}", + }, + { + Name: "GetContents with dash", + Path: "/repos/connectrpc/connect-go/contents/protocol.go", + Want: "/repos/{owner}/{repo}/contents/{path}", + }, + { + Name: "GetContents without path", + Path: "/repos/grafana/pyroscope/contents/", + Want: "unknown_route", + }, + { + Name: "GetContents with whitespace in path", + Path: "/repos/grafana/pyroscope/contents/path with spaces", + Want: "unknown_route", + }, + { + Name: "GetCommit", + Path: "/repos/grafana/pyroscope/commits/abcdef1234567890", + Want: "/repos/{owner}/{repo}/commits/{ref}", + }, + { + Name: "GetCommit with lowercase and uppercase ref", + Path: "/repos/grafana/pyroscope/commits/abcdefABCDEF1234567890", + Want: "/repos/{owner}/{repo}/commits/{ref}", + }, + { + Name: "GetCommit with non-hexadecimal ref", + Path: "/repos/grafana/pyroscope/commits/HEAD", + Want: "/repos/{owner}/{repo}/commits/{ref}", + }, + { + Name: "GetCommit without commit", + Path: "/repos/grafana/pyroscope/commits/", + Want: "unknown_route", + }, + { + Name: "Refresh", + Path: "/login/oauth/access_token", + Want: "/login/oauth/access_token", + }, + { + Name: "empty path", + Path: "", + Want: "unknown_route", + }, + { + Name: "unmapped path", + Path: "/some/random/path", + Want: "unknown_route", + }, + } + + for _, tt := range tests { + t.Run(tt.Name, func(t *testing.T) { + got := matchGitHubAPIRoute(tt.Path) + require.Equal(t, tt.Want, got) + }) + } +} diff --git a/pkg/querier/vcs/github.go b/pkg/querier/vcs/github.go index b256a11fd7..1b3599a36a 100644 --- a/pkg/querier/vcs/github.go +++ b/pkg/querier/vcs/github.go @@ -65,10 +65,7 @@ func githubOAuthConfig() (*oauth2.Config, error) { // refreshGithubToken sends a request configured for the GitHub API and marshals // the response into a githubAuthToken. -func refreshGithubToken(req *http.Request) (*githubAuthToken, error) { - client := http.Client{ - Timeout: 10 * time.Second, - } +func refreshGithubToken(req *http.Request, client *http.Client) (*githubAuthToken, error) { res, err := client.Do(req) if err != nil { return nil, fmt.Errorf("failed to make request: %w", err) diff --git a/pkg/querier/vcs/github.md b/pkg/querier/vcs/github.md deleted file mode 100644 index 6361da61ab..0000000000 --- a/pkg/querier/vcs/github.md +++ /dev/null @@ -1,41 +0,0 @@ -# GitHub Integration - -The following diagram shows the flow of data when fetching source code from GitHub. - -```mermaid -sequenceDiagram - autonumber - actor User - participant Client - participant Pyroscope - participant GitHub - User->>Client: Select a Flame graph Node - Client->>Pyroscope: Request Flame graph Data For Node - Pyroscope-->>Client: Flame graph Data - Client-->Client: if the data has source code information - Client->>Pyroscope: Request Github App client ID - Pyroscope-->>Client: Github App client ID - Client->>GitHub: Redirect for Authorization of Pyroscope App - GitHub-->>Client: Send Authorization Code - Client->>Pyroscope: Login to GitHub with Authorization Code - Pyroscope->>GitHub: Request Access Token - GitHub-->>Pyroscope: Send OAuth token - Pyroscope-->>Client: Set OAuth token in cookie - User-->>Client: Select a git version - Client->>Pyroscope: Request code for given git version, file path and repo - Pyroscope->>GitHub: Fetch code - GitHub-->>Pyroscope: Send code - Pyroscope-->>Client: Send code - Client-->>Client: Render code and map line number -``` - -## GitHub App Authorization - -Pyroscope uses GitHub OAuth App to fetch source code for a given git version, file path and repo. This is done by using the [GitHub OAuth App](https://docs.github.com/en/developers/apps/authorizing-oauth-apps) to get an OAuth token for the user. This token is then used to fetch the source code from GitHub. - -Pyroscope supports a new API to fetch Github App client ID. This API is used to get the client ID for the GitHub OAuth App. This client ID is then used to redirect the user to GitHub to authorize Pyroscope to access the user's GitHub account. -Github will redirect the user back to the client with an authorization code. This authorization code is then used to get an OAuth token from GitHub via Pyroscope. - -The OAuth token is never stored by Pyroscope. It is only stored in the user's browser as a cookie. Only Pyroscope can access this cookie as it is encrypted with a secret key that is only known to Pyroscope. - -When requesting a file to Pyroscope and it returns a `401 Unauthorized` response, it means that the OAuth token has expired. At the same, time the user's browser will delete the cookie containing the OAuth token. The user will then have to log in again to GitHub to get a new OAuth token. diff --git a/pkg/querier/vcs/github_test.go b/pkg/querier/vcs/github_test.go index 9fecf16dfd..0354507c5c 100644 --- a/pkg/querier/vcs/github_test.go +++ b/pkg/querier/vcs/github_test.go @@ -66,7 +66,7 @@ func Test_refreshGithubToken(t *testing.T) { req, err := http.NewRequest("POST", fakeGithubAPI.URL, nil) require.NoError(t, err) - got, err := refreshGithubToken(req) + got, err := refreshGithubToken(req, http.DefaultClient) require.NoError(t, err) require.Equal(t, want, *got) } diff --git a/pkg/querier/vcs/service.go b/pkg/querier/vcs/service.go index 1b90ce608f..c7daef6a8c 100644 --- a/pkg/querier/vcs/service.go +++ b/pkg/querier/vcs/service.go @@ -11,6 +11,7 @@ import ( "github.com/go-kit/log" giturl "github.com/kubescape/go-git-url" "github.com/kubescape/go-git-url/apis" + "github.com/prometheus/client_golang/prometheus" "golang.org/x/oauth2" vcsv1 "github.com/grafana/pyroscope/api/gen/proto/go/vcs/v1" @@ -22,12 +23,16 @@ import ( var _ vcsv1connect.VCSServiceHandler = (*Service)(nil) type Service struct { - logger log.Logger + logger log.Logger + httpClient *http.Client } -func New(logger log.Logger) *Service { +func New(logger log.Logger, reg prometheus.Registerer) *Service { + httpClient := client.InstrumentedHTTPClient(logger, reg) + return &Service{ - logger: logger, + logger: logger, + httpClient: httpClient, } } @@ -81,7 +86,7 @@ func (q *Service) GithubRefresh(ctx context.Context, req *connect.Request[vcsv1. return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("failed to refresh token")) } - githubToken, err := refreshGithubToken(githubRequest) + githubToken, err := refreshGithubToken(githubRequest, q.httpClient) if err != nil { q.logger.Log("err", err, "msg", "failed to refresh token with GitHub") return nil, connect.NewError(connect.CodeInternal, fmt.Errorf("failed to refresh token")) @@ -130,7 +135,7 @@ func (q *Service) GetFile(ctx context.Context, req *connect.Request[vcsv1.GetFil } // todo: we can support multiple provider: bitbucket, gitlab, etc. - ghClient, err := client.GithubClient(ctx, token) + ghClient, err := client.GithubClient(ctx, token, q.httpClient) if err != nil { return nil, err } @@ -173,7 +178,7 @@ func (q *Service) GetCommit(ctx context.Context, req *connect.Request[vcsv1.GetC return nil, connect.NewError(connect.CodeInvalidArgument, errors.New("only GitHub repositories are supported")) } - ghClient, err := client.GithubClient(ctx, token) + ghClient, err := client.GithubClient(ctx, token, q.httpClient) if err != nil { return nil, err } diff --git a/pkg/storegateway/clientpool/client_pool.go b/pkg/storegateway/clientpool/client_pool.go index 3e9973f783..d8400eb478 100644 --- a/pkg/storegateway/clientpool/client_pool.go +++ b/pkg/storegateway/clientpool/client_pool.go @@ -66,8 +66,10 @@ func (f *poolFactory) FromInstance(inst ring.InstanceDesc) (ring_client.PoolClie if err != nil { return nil, err } + + httpClient := util.InstrumentedDefaultHTTPClient(util.WithTracingTransport()) return &storeGatewayPoolClient{ - StoreGatewayServiceClient: storegatewayv1connect.NewStoreGatewayServiceClient(util.InstrumentedHTTPClient(), "http://"+inst.Addr, f.options...), + StoreGatewayServiceClient: storegatewayv1connect.NewStoreGatewayServiceClient(httpClient, "http://"+inst.Addr, f.options...), HealthClient: grpc_health_v1.NewHealthClient(conn), Closer: conn, }, nil diff --git a/pkg/util/http.go b/pkg/util/http.go index 606f97684b..43920d4798 100644 --- a/pkg/util/http.go +++ b/pkg/util/http.go @@ -50,20 +50,37 @@ func (f RoundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) { return f(req) } -// InstrumentedHTTPClient returns a HTTP client with tracing instrumented default RoundTripper. -func InstrumentedHTTPClient() *http.Client { - return &http.Client{ - Transport: WrapWithInstrumentedHTTPTransport(defaultTransport), +type RoundTripperInstrumentFunc func(next http.RoundTripper) http.RoundTripper + +// InstrumentedDefaultHTTPClient returns an http client configured with some +// default settings which is wrapped with a variety of instrumented +// RoundTrippers. +func InstrumentedDefaultHTTPClient(instruments ...RoundTripperInstrumentFunc) *http.Client { + client := &http.Client{ + Transport: defaultTransport, } + return InstrumentedHTTPClient(client, instruments...) } -// WrapWithInstrumentedHTTPTransport wraps the given RoundTripper with an tracing instrumented one. -func WrapWithInstrumentedHTTPTransport(next http.RoundTripper) http.RoundTripper { - next = &nethttp.Transport{RoundTripper: next} - return RoundTripperFunc(func(req *http.Request) (*http.Response, error) { - req = nethttp.TraceRequest(opentracing.GlobalTracer(), req) - return next.RoundTrip(req) - }) +// InstrumentedHTTPClient adds the associated instrumentation middlewares to the +// provided http client. +func InstrumentedHTTPClient(client *http.Client, instruments ...RoundTripperInstrumentFunc) *http.Client { + for i := len(instruments) - 1; i >= 0; i-- { + client.Transport = instruments[i](client.Transport) + } + return client +} + +// WithTracingTransport wraps the given RoundTripper with a tracing instrumented +// one. +func WithTracingTransport() RoundTripperInstrumentFunc { + return func(next http.RoundTripper) http.RoundTripper { + next = &nethttp.Transport{RoundTripper: next} + return RoundTripperFunc(func(req *http.Request) (*http.Response, error) { + req = nethttp.TraceRequest(opentracing.GlobalTracer(), req) + return next.RoundTrip(req) + }) + } } // WriteYAMLResponse writes some YAML as a HTTP response.