Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sampling: fix pubsub implementation #5126

Merged
merged 14 commits into from May 26, 2021
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions apmpackage/apm/0.2.0/data_stream/sampled_traces/manifest.yml
Expand Up @@ -2,3 +2,10 @@ title: APM tail-sampled traces
type: traces
dataset: sampled
ilm_policy: traces-apm.sampled-default_policy
elasticsearch:
index_template:
settings:
# Create a single shard per index, so we can use
# global checkpoints as a way of limiting search
# results.
number_of_shards: 1
1 change: 1 addition & 0 deletions changelogs/head.asciidoc
Expand Up @@ -9,6 +9,7 @@ https://github.com/elastic/apm-server/compare/7.13\...master[View commits]
[float]
==== Bug fixes
* Fix panic due to misaligned 64-bit access on 32-bit architectures {pull}5277[5277]
* Fixed tail-based sampling pubsub to use _seq_no correctly {pull}5126[5126]

[float]
==== Intake API Changes
Expand Down
86 changes: 86 additions & 0 deletions x-pack/apm-server/sampling/pubsub/checkpoints.go
@@ -0,0 +1,86 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package pubsub

import (
"context"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"

"github.com/pkg/errors"

"github.com/elastic/go-elasticsearch/v7/esapi"

"github.com/elastic/apm-server/elasticsearch"
)

// getGlobalCheckpoints returns the current global checkpoint for each index
// underlying dataStream. Each index is required to have a single (primary) shard.
func getGlobalCheckpoints(
ctx context.Context,
client elasticsearch.Client,
dataStream string,
) (map[string]int64, error) {
indexGlobalCheckpoints := make(map[string]int64)
resp, err := esapi.IndicesStatsRequest{
axw marked this conversation as resolved.
Show resolved Hide resolved
Index: []string{dataStream},
Level: "shards",
// By default all metrics are returned; query just the "get" metric,
// which is very cheap.
Metric: []string{"get"},
}.Do(ctx, client)
if err != nil {
return nil, errors.New("index stats request failed")
}
defer resp.Body.Close()
if resp.IsError() {
switch resp.StatusCode {
case http.StatusNotFound:
// Data stream does not yet exist.
return indexGlobalCheckpoints, nil
simitt marked this conversation as resolved.
Show resolved Hide resolved
}
message, _ := ioutil.ReadAll(resp.Body)
return nil, fmt.Errorf("index stats request failed: %s", message)
}

var stats dataStreamStats
if err := json.NewDecoder(resp.Body).Decode(&stats); err != nil {
return nil, err
}

for index, indexStats := range stats.Indices {
if n := len(indexStats.Shards); n > 1 {
return nil, fmt.Errorf("expected 1 shard, got %d for index %q", n, index)
}
for _, shardStats := range indexStats.Shards {
for _, shardStats := range shardStats {
if shardStats.Routing.Primary {
indexGlobalCheckpoints[index] = shardStats.SeqNo.GlobalCheckpoint
break
}
}
}
}
return indexGlobalCheckpoints, nil
}

type dataStreamStats struct {
Indices map[string]indexStats `json:"indices"`
}

type indexStats struct {
Shards map[string][]shardStats `json:"shards"`
}

type shardStats struct {
Routing struct {
Primary bool `json:"primary"`
} `json:"routing"`
SeqNo struct {
GlobalCheckpoint int64 `json:"global_checkpoint"`
} `json:"seq_no"`
}
3 changes: 2 additions & 1 deletion x-pack/apm-server/sampling/pubsub/datastream.go
Expand Up @@ -87,7 +87,8 @@ const dataStreamIndexTemplate = `{
"data_stream": {},
"template": {
"settings": {
"index.lifecycle.name": %q
"index.lifecycle.name": %q,
"index.number_of_shards": 1
},
"mappings": {
"properties": {
Expand Down