Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v10.0.x] Alerting: Add heuristics back to datasource healthchecks #69541

Merged
merged 2 commits into from
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
35 changes: 26 additions & 9 deletions pkg/tsdb/prometheus/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ import (
"fmt"
"time"

"github.com/grafana/grafana-plugin-sdk-go/backend"
"github.com/grafana/kindsys"

"github.com/grafana/grafana-plugin-sdk-go/backend"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/tsdb/prometheus/kinds/dataquery"
"github.com/grafana/grafana/pkg/tsdb/prometheus/models"
Expand All @@ -28,14 +28,32 @@ func (s *Service) CheckHealth(ctx context.Context, req *backend.CheckHealthReque

// check that the datasource exists
if err != nil {
return getHealthCheckMessage(logger, "error getting datasource info", err)
return getHealthCheckMessage("error getting datasource info", err)
}

if ds == nil {
return getHealthCheckMessage(logger, "", errors.New("invalid datasource info received"))
return getHealthCheckMessage("", errors.New("invalid datasource info received"))
}

hc, err := healthcheck(ctx, req, ds)
if err != nil {
logger.Warn("error performing prometheus healthcheck", "err", err.Error())
return nil, err
}

heuristics, err := getHeuristics(ctx, ds)
if err != nil {
logger.Warn("failed to get prometheus heuristics", "err", err.Error())
} else {
jsonDetails, err := json.Marshal(heuristics)
if err != nil {
logger.Warn("failed to marshal heuristics", "err", err)
} else {
hc.JSONDetails = jsonDetails
}
}

return healthcheck(ctx, req, ds)
return hc, nil
}

func healthcheck(ctx context.Context, req *backend.CheckHealthRequest, i *instance) (*backend.CheckHealthResult, error) {
Expand Down Expand Up @@ -64,26 +82,25 @@ func healthcheck(ctx context.Context, req *backend.CheckHealthRequest, i *instan
})

if err != nil {
return getHealthCheckMessage(logger, "There was an error returned querying the Prometheus API.", err)
return getHealthCheckMessage("There was an error returned querying the Prometheus API.", err)
}

if resp.Responses[refID].Error != nil {
return getHealthCheckMessage(logger, "There was an error returned querying the Prometheus API.",
return getHealthCheckMessage("There was an error returned querying the Prometheus API.",
errors.New(resp.Responses[refID].Error.Error()))
}

return getHealthCheckMessage(logger, "Successfully queried the Prometheus API.", nil)
return getHealthCheckMessage("Successfully queried the Prometheus API.", nil)
}

func getHealthCheckMessage(logger log.Logger, message string, err error) (*backend.CheckHealthResult, error) {
func getHealthCheckMessage(message string, err error) (*backend.CheckHealthResult, error) {
if err == nil {
return &backend.CheckHealthResult{
Status: backend.HealthStatusOk,
Message: message,
}, nil
}

logger.Warn("error performing prometheus healthcheck", "err", err.Error())
errorMessage := fmt.Sprintf("%s - %s", err.Error(), message)

return &backend.CheckHealthResult{
Expand Down
112 changes: 112 additions & 0 deletions pkg/tsdb/prometheus/heuristics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package prometheus

import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"

"github.com/grafana/grafana-plugin-sdk-go/backend"
)

const (
KindPrometheus = "Prometheus"
KindMimir = "Mimir"
)

var (
ErrNoBuildInfo = errors.New("no build info")
)

type BuildInfoRequest struct {
PluginContext backend.PluginContext
}

type BuildInfoResponse struct {
Status string `json:"status"`
Data BuildInfoResponseData `json:"data"`
}

type BuildInfoResponseData struct {
Version string `json:"version"`
Revision string `json:"revision"`
Branch string `json:"branch"`
Features map[string]string `json:"features"`
BuildUser string `json:"buildUser"`
BuildDate string `json:"buildDate"`
GoVersion string `json:"goVersion"`
}

func (s *Service) GetBuildInfo(ctx context.Context, req BuildInfoRequest) (*BuildInfoResponse, error) {
ds, err := s.getInstance(req.PluginContext)
if err != nil {
return nil, err
}
return getBuildInfo(ctx, ds)
}

// getBuildInfo queries /api/v1/status/buildinfo
func getBuildInfo(ctx context.Context, i *instance) (*BuildInfoResponse, error) {
resp, err := i.resource.Execute(ctx, &backend.CallResourceRequest{
Path: "api/v1/status/buildinfo",
})
if err != nil {
return nil, err
}
if resp.Status == http.StatusNotFound {
return nil, ErrNoBuildInfo
}
if resp.Status != http.StatusOK {
return nil, fmt.Errorf("unexpected response %d", resp.Status)
}
res := BuildInfoResponse{}
if err := json.Unmarshal(resp.Body, &res); err != nil {
return nil, fmt.Errorf("failed to unmarshal JSON: %w", err)
}
return &res, nil
}

type HeuristicsRequest struct {
PluginContext backend.PluginContext
}

type Heuristics struct {
Application string `json:"application"`
Features Features `json:"features"`
}

type Features struct {
RulerApiEnabled bool `json:"rulerApiEnabled"`
}

func (s *Service) GetHeuristics(ctx context.Context, req HeuristicsRequest) (*Heuristics, error) {
ds, err := s.getInstance(req.PluginContext)
if err != nil {
return nil, err
}
return getHeuristics(ctx, ds)
}

func getHeuristics(ctx context.Context, i *instance) (*Heuristics, error) {
heuristics := Heuristics{
Application: "unknown",
Features: Features{
RulerApiEnabled: false,
},
}
buildInfo, err := getBuildInfo(ctx, i)
if err != nil {
logger.Warn("failed to get prometheus buildinfo", "err", err.Error())
return nil, fmt.Errorf("failed to get buildinfo: %w", err)
}
if len(buildInfo.Data.Features) == 0 {
// If there are no features then this is a Prometheus datasource
heuristics.Application = KindPrometheus
heuristics.Features.RulerApiEnabled = false
} else {
heuristics.Application = KindMimir
heuristics.Features.RulerApiEnabled = true
}
return &heuristics, nil
}
98 changes: 98 additions & 0 deletions pkg/tsdb/prometheus/heuristics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package prometheus

import (
"context"
"io"
"net/http"
"strconv"
"strings"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/grafana/grafana-plugin-sdk-go/backend/datasource"
sdkHttpClient "github.com/grafana/grafana-plugin-sdk-go/backend/httpclient"
"github.com/grafana/grafana/pkg/infra/httpclient"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/setting"
)

type heuristicsProvider struct {
httpclient.Provider
http.RoundTripper
}

type heuristicsSuccessRoundTripper struct {
res io.ReadCloser
status int
}

func (rt *heuristicsSuccessRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
return &http.Response{
Status: strconv.Itoa(rt.status),
StatusCode: rt.status,
Header: nil,
Body: rt.res,
ContentLength: 0,
Request: req,
}, nil
}

func (provider *heuristicsProvider) New(opts ...sdkHttpClient.Options) (*http.Client, error) {
client := &http.Client{}
client.Transport = provider.RoundTripper
return client, nil
}

func (provider *heuristicsProvider) GetTransport(opts ...sdkHttpClient.Options) (http.RoundTripper, error) {
return provider.RoundTripper, nil
}

func getHeuristicsMockProvider(rt http.RoundTripper) *heuristicsProvider {
return &heuristicsProvider{
RoundTripper: rt,
}
}

func Test_GetHeuristics(t *testing.T) {
t.Run("should return Prometheus", func(t *testing.T) {
rt := heuristicsSuccessRoundTripper{
res: io.NopCloser(strings.NewReader("{\"status\":\"success\",\"data\":{\"version\":\"1.0\"}}")),
status: http.StatusOK,
}
httpProvider := getHeuristicsMockProvider(&rt)
s := &Service{
im: datasource.NewInstanceManager(newInstanceSettings(httpProvider, &setting.Cfg{}, &featuremgmt.FeatureManager{}, nil)),
}

req := HeuristicsRequest{
PluginContext: getPluginContext(),
}
res, err := s.GetHeuristics(context.Background(), req)
assert.NoError(t, err)
require.NotNil(t, res)
assert.Equal(t, KindPrometheus, res.Application)
assert.Equal(t, Features{RulerApiEnabled: false}, res.Features)
})

t.Run("should return Mimir", func(t *testing.T) {
rt := heuristicsSuccessRoundTripper{
res: io.NopCloser(strings.NewReader("{\"status\":\"success\",\"data\":{\"features\":{\"foo\":\"bar\"},\"version\":\"1.0\"}}")),
status: http.StatusOK,
}
httpProvider := getHeuristicsMockProvider(&rt)
s := &Service{
im: datasource.NewInstanceManager(newInstanceSettings(httpProvider, &setting.Cfg{}, &featuremgmt.FeatureManager{}, nil)),
}

req := HeuristicsRequest{
PluginContext: getPluginContext(),
}
res, err := s.GetHeuristics(context.Background(), req)
assert.NoError(t, err)
require.NotNil(t, res)
assert.Equal(t, KindMimir, res.Application)
assert.Equal(t, Features{RulerApiEnabled: true}, res.Features)
})
}