Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.5] Backport add sampling rate to fix distributed tracing #16951

Merged
merged 3 commits into from
Nov 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions server/embed/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,9 @@ type Config struct {
// that exist at the same time.
// Can only be used if ExperimentalEnableDistributedTracing is true.
ExperimentalDistributedTracingServiceInstanceID string `json:"experimental-distributed-tracing-instance-id"`
// ExperimentalDistributedTracingSamplingRatePerMillion is the number of samples to collect per million spans.
// Defaults to 0.
ExperimentalDistributedTracingSamplingRatePerMillion int `json:"experimental-distributed-tracing-sampling-rate"`

// Logger is logger options: currently only supports "zap".
// "capnslog" is removed in v3.5.
Expand Down Expand Up @@ -758,6 +761,13 @@ func (cfg *Config) Validate() error {
return fmt.Errorf("cipher suites cannot be configured when only TLS1.3 is enabled")
}

// Validate distributed tracing configuration but only if enabled.
if cfg.ExperimentalEnableDistributedTracing {
if err := validateTracingConfig(cfg.ExperimentalDistributedTracingSamplingRatePerMillion); err != nil {
return fmt.Errorf("distributed tracing configurition is not valid: (%v)", err)
}
}

return nil
}

Expand Down
72 changes: 59 additions & 13 deletions server/embed/config_tracing.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package embed

import (
"context"
"fmt"

"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
Expand All @@ -26,13 +27,32 @@ import (
"go.uber.org/zap"
)

func setupTracingExporter(ctx context.Context, cfg *Config) (exporter tracesdk.SpanExporter, options []otelgrpc.Option, err error) {
exporter, err = otlptracegrpc.New(ctx,
const maxSamplingRatePerMillion = 1000000

func validateTracingConfig(samplingRate int) error {
if samplingRate < 0 {
return fmt.Errorf("tracing sampling rate must be positive")
}
if samplingRate > maxSamplingRatePerMillion {
return fmt.Errorf("tracing sampling rate must be less than %d", maxSamplingRatePerMillion)
}

return nil
}

type tracingExporter struct {
exporter tracesdk.SpanExporter
opts []otelgrpc.Option
provider *tracesdk.TracerProvider
}

func newTracingExporter(ctx context.Context, cfg *Config) (*tracingExporter, error) {
exporter, err := otlptracegrpc.New(ctx,
otlptracegrpc.WithInsecure(),
otlptracegrpc.WithEndpoint(cfg.ExperimentalDistributedTracingAddress),
)
if err != nil {
return nil, nil, err
return nil, err
}

res, err := resource.New(ctx,
Expand All @@ -41,33 +61,37 @@ func setupTracingExporter(ctx context.Context, cfg *Config) (exporter tracesdk.S
),
)
if err != nil {
return nil, nil, err
return nil, err
}

if resWithIDKey := determineResourceWithIDKey(cfg.ExperimentalDistributedTracingServiceInstanceID); resWithIDKey != nil {
// Merge resources into a new
// resource in case of duplicates.
res, err = resource.Merge(res, resWithIDKey)
if err != nil {
return nil, nil, err
return nil, err
}
}

options = append(options,
traceProvider := tracesdk.NewTracerProvider(
tracesdk.WithBatcher(exporter),
tracesdk.WithResource(res),
tracesdk.WithSampler(
tracesdk.ParentBased(determineSampler(cfg.ExperimentalDistributedTracingSamplingRatePerMillion)),
),
)

options := []otelgrpc.Option{
otelgrpc.WithPropagators(
propagation.NewCompositeTextMapPropagator(
propagation.TraceContext{},
propagation.Baggage{},
),
),
otelgrpc.WithTracerProvider(
tracesdk.NewTracerProvider(
tracesdk.WithBatcher(exporter),
tracesdk.WithResource(res),
tracesdk.WithSampler(tracesdk.ParentBased(tracesdk.NeverSample())),
),
traceProvider,
),
)
}

cfg.logger.Debug(
"distributed tracing enabled",
Expand All @@ -76,7 +100,29 @@ func setupTracingExporter(ctx context.Context, cfg *Config) (exporter tracesdk.S
zap.String("service-instance-id", cfg.ExperimentalDistributedTracingServiceInstanceID),
)

return exporter, options, err
return &tracingExporter{
exporter: exporter,
opts: options,
provider: traceProvider,
}, nil
}

func (te *tracingExporter) Close(ctx context.Context) {
if te.provider != nil {
te.provider.Shutdown(ctx)
}

if te.exporter != nil {
te.exporter.Shutdown(ctx)
}
}

func determineSampler(samplingRate int) tracesdk.Sampler {
sampler := tracesdk.NeverSample()
if samplingRate == 0 {
return sampler
}
return tracesdk.TraceIDRatioBased(float64(samplingRate) / float64(maxSamplingRatePerMillion))
}

// As Tracing service Instance ID must be unique, it should
Expand Down
83 changes: 83 additions & 0 deletions server/embed/config_tracing_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright 2021 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package embed

import (
"testing"
)

const neverSampleDescription = "AlwaysOffSampler"

func TestDetermineSampler(t *testing.T) {
tests := []struct {
name string
sampleRate int
wantSamplerDescription string
}{
{
name: "sample rate is disabled",
sampleRate: 0,
wantSamplerDescription: neverSampleDescription,
},
{
name: "sample rate is 100",
sampleRate: 100,
wantSamplerDescription: "TraceIDRatioBased{0.0001}",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
sampler := determineSampler(tc.sampleRate)
if tc.wantSamplerDescription != sampler.Description() {
t.Errorf("tracing sampler was not as expected; expected sampler: %#+v, got sampler: %#+v", tc.wantSamplerDescription, sampler.Description())
}
})
}
}

func TestTracingConfig(t *testing.T) {
tests := []struct {
name string
sampleRate int
wantErr bool
}{
{
name: "invalid - sample rate is less than 0",
sampleRate: -1,
wantErr: true,
},
{
name: "invalid - sample rate is more than allowed value",
sampleRate: maxSamplingRatePerMillion + 1,
wantErr: true,
},
{
name: "valid - sample rate is 100",
sampleRate: 100,
wantErr: false,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
err := validateTracingConfig(tc.sampleRate)
if err == nil && tc.wantErr {
t.Errorf("expected error got (%v) error", err)
}
if err != nil && !tc.wantErr {
t.Errorf("expected no errors, got error: (%v)", err)
}
})
}
}
9 changes: 4 additions & 5 deletions server/embed/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,15 +228,14 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) {

if srvcfg.ExperimentalEnableDistributedTracing {
tctx := context.Background()
tracingExporter, opts, err := setupTracingExporter(tctx, cfg)
tracingExporter, err := newTracingExporter(tctx, cfg)
if err != nil {
return e, err
}
if tracingExporter == nil || len(opts) == 0 {
return e, fmt.Errorf("error setting up distributed tracing")
e.tracingExporterShutdown = func() {
tracingExporter.Close(tctx)
}
e.tracingExporterShutdown = func() { tracingExporter.Shutdown(tctx) }
srvcfg.ExperimentalTracerOptions = opts
srvcfg.ExperimentalTracerOptions = tracingExporter.opts

e.cfg.logger.Info("distributed tracing setup enabled")
}
Expand Down
1 change: 1 addition & 0 deletions server/etcdmain/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ func newConfig() *config {
fs.StringVar(&cfg.ec.ExperimentalDistributedTracingAddress, "experimental-distributed-tracing-address", embed.ExperimentalDistributedTracingAddress, "Address for distributed tracing used for OpenTelemetry Tracing (if enabled with experimental-enable-distributed-tracing flag).")
fs.StringVar(&cfg.ec.ExperimentalDistributedTracingServiceName, "experimental-distributed-tracing-service-name", embed.ExperimentalDistributedTracingServiceName, "Configures service name for distributed tracing to be used to define service name for OpenTelemetry Tracing (if enabled with experimental-enable-distributed-tracing flag). 'etcd' is the default service name. Use the same service name for all instances of etcd.")
fs.StringVar(&cfg.ec.ExperimentalDistributedTracingServiceInstanceID, "experimental-distributed-tracing-instance-id", "", "Configures service instance ID for distributed tracing to be used to define service instance ID key for OpenTelemetry Tracing (if enabled with experimental-enable-distributed-tracing flag). There is no default value set. This ID must be unique per etcd instance.")
fs.IntVar(&cfg.ec.ExperimentalDistributedTracingSamplingRatePerMillion, "experimental-distributed-tracing-sampling-rate", 0, "Number of samples to collect per million spans for OpenTelemetry Tracing (if enabled with experimental-enable-distributed-tracing flag).")

// auth
fs.StringVar(&cfg.ec.AuthToken, "auth-token", cfg.ec.AuthToken, "Specify auth token specific options.")
Expand Down
2 changes: 2 additions & 0 deletions server/etcdmain/help.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,8 @@ Experimental distributed tracing:
Distributed tracing service name, must be same across all etcd instances.
--experimental-distributed-tracing-instance-id ''
Distributed tracing instance ID, must be unique per each etcd instance.
--experimental-distributed-tracing-sampling-rate '0'
Number of samples to collect per million spans for distributed tracing. Disabled by default.

v2 Proxy (to be deprecated in v3.6):
--proxy 'off'
Expand Down
10 changes: 5 additions & 5 deletions tests/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ require (
go.etcd.io/etcd/pkg/v3 v3.5.10
go.etcd.io/etcd/raft/v3 v3.5.10
go.etcd.io/etcd/server/v3 v3.5.10
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.0
go.opentelemetry.io/otel v1.20.0
go.opentelemetry.io/otel/sdk v1.20.0
go.opentelemetry.io/otel/trace v1.20.0
go.opentelemetry.io/proto/otlp v1.0.0
go.uber.org/zap v1.17.0
golang.org/x/crypto v0.14.0
golang.org/x/sync v0.3.0
Expand Down Expand Up @@ -72,14 +77,9 @@ require (
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect
go.etcd.io/bbolt v1.3.8 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.0 // indirect
go.opentelemetry.io/otel v1.20.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.20.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.20.0 // indirect
go.opentelemetry.io/otel/metric v1.20.0 // indirect
go.opentelemetry.io/otel/sdk v1.20.0 // indirect
go.opentelemetry.io/otel/trace v1.20.0 // indirect
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
go.uber.org/atomic v1.7.0 // indirect
go.uber.org/multierr v1.6.0 // indirect
golang.org/x/net v0.17.0 // indirect
Expand Down
Loading
Loading