From 158b70762ebd05aea33cb809bf62775874876888 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Wed, 31 Jan 2024 06:50:13 -0800 Subject: [PATCH] Introduce sampling package as reference implementation for OTEP 235 (#29720) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Description:** This is the `pkg/sampling` portion of of https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/24811. **Link to tracking Issue:** #29738 https://github.com/open-telemetry/opentelemetry-specification/issues/1413 **Testing:** Complete. **Documentation:** New README added. --------- Co-authored-by: Juraci Paixão Kröhling Co-authored-by: Kent Quirk --- .chloggen/add_pkg_sampling.yaml | 27 ++ .github/CODEOWNERS | 1 + .github/ISSUE_TEMPLATE/bug_report.yaml | 1 + .github/ISSUE_TEMPLATE/feature_request.yaml | 1 + .github/ISSUE_TEMPLATE/other.yaml | 1 + pkg/sampling/Makefile | 1 + pkg/sampling/README.md | 23 ++ pkg/sampling/common.go | 125 +++++++ pkg/sampling/doc.go | 89 +++++ pkg/sampling/encoding_test.go | 302 +++++++++++++++++ pkg/sampling/go.mod | 24 ++ pkg/sampling/go.sum | 70 ++++ pkg/sampling/metadata.yaml | 4 + pkg/sampling/oteltracestate.go | 227 +++++++++++++ pkg/sampling/oteltracestate_test.go | 344 ++++++++++++++++++++ pkg/sampling/probability.go | 92 ++++++ pkg/sampling/probability_test.go | 289 ++++++++++++++++ pkg/sampling/randomness.go | 89 +++++ pkg/sampling/randomness_test.go | 38 +++ pkg/sampling/threshold.go | 116 +++++++ pkg/sampling/threshold_test.go | 68 ++++ pkg/sampling/w3ctracestate.go | 177 ++++++++++ pkg/sampling/w3ctracestate_test.go | 161 +++++++++ versions.yaml | 1 + 24 files changed, 2271 insertions(+) create mode 100644 .chloggen/add_pkg_sampling.yaml create mode 100644 pkg/sampling/Makefile create mode 100644 pkg/sampling/README.md create mode 100644 pkg/sampling/common.go create mode 100644 pkg/sampling/doc.go create mode 100644 pkg/sampling/encoding_test.go create mode 100644 pkg/sampling/go.mod create mode 100644 pkg/sampling/go.sum create mode 100644 pkg/sampling/metadata.yaml create mode 100644 pkg/sampling/oteltracestate.go create mode 100644 pkg/sampling/oteltracestate_test.go create mode 100644 pkg/sampling/probability.go create mode 100644 pkg/sampling/probability_test.go create mode 100644 pkg/sampling/randomness.go create mode 100644 pkg/sampling/randomness_test.go create mode 100644 pkg/sampling/threshold.go create mode 100644 pkg/sampling/threshold_test.go create mode 100644 pkg/sampling/w3ctracestate.go create mode 100644 pkg/sampling/w3ctracestate_test.go diff --git a/.chloggen/add_pkg_sampling.yaml b/.chloggen/add_pkg_sampling.yaml new file mode 100644 index 0000000000000..e9b32eaf48400 --- /dev/null +++ b/.chloggen/add_pkg_sampling.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: new_component + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: pkg_sampling + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Package of code for parsing OpenTelemetry tracestate probability sampling fields. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [29738] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [api] diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 2606f24a0a9f4..3bd0227b7e5fa 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -136,6 +136,7 @@ pkg/ottl/ @open-telemetry/collect pkg/pdatatest/ @open-telemetry/collector-contrib-approvers @djaglowski @fatsheep9146 pkg/pdatautil/ @open-telemetry/collector-contrib-approvers @dmitryax pkg/resourcetotelemetry/ @open-telemetry/collector-contrib-approvers @mx-psi +pkg/sampling/ @open-telemetry/collector-contrib-approvers @jmacd @kentquirk pkg/stanza/ @open-telemetry/collector-contrib-approvers @djaglowski pkg/translator/azure/ @open-telemetry/collector-contrib-approvers @open-telemetry/collector-approvers @atoulme @cparkins pkg/translator/jaeger/ @open-telemetry/collector-contrib-approvers @open-telemetry/collector-approvers @frzifus diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml index afd6dcde521c4..4e9ac1cf1b89b 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yaml +++ b/.github/ISSUE_TEMPLATE/bug_report.yaml @@ -133,6 +133,7 @@ body: - pkg/pdatatest - pkg/pdatautil - pkg/resourcetotelemetry + - pkg/sampling - pkg/stanza - pkg/translator/azure - pkg/translator/jaeger diff --git a/.github/ISSUE_TEMPLATE/feature_request.yaml b/.github/ISSUE_TEMPLATE/feature_request.yaml index 4a236f74a82c2..e2120c8364f21 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yaml +++ b/.github/ISSUE_TEMPLATE/feature_request.yaml @@ -127,6 +127,7 @@ body: - pkg/pdatatest - pkg/pdatautil - pkg/resourcetotelemetry + - pkg/sampling - pkg/stanza - pkg/translator/azure - pkg/translator/jaeger diff --git a/.github/ISSUE_TEMPLATE/other.yaml b/.github/ISSUE_TEMPLATE/other.yaml index 6b57ba363f9d8..09c9202dc0319 100644 --- a/.github/ISSUE_TEMPLATE/other.yaml +++ b/.github/ISSUE_TEMPLATE/other.yaml @@ -127,6 +127,7 @@ body: - pkg/pdatatest - pkg/pdatautil - pkg/resourcetotelemetry + - pkg/sampling - pkg/stanza - pkg/translator/azure - pkg/translator/jaeger diff --git a/pkg/sampling/Makefile b/pkg/sampling/Makefile new file mode 100644 index 0000000000000..ded7a36092dc3 --- /dev/null +++ b/pkg/sampling/Makefile @@ -0,0 +1 @@ +include ../../Makefile.Common diff --git a/pkg/sampling/README.md b/pkg/sampling/README.md new file mode 100644 index 0000000000000..eb3a4f8cc1814 --- /dev/null +++ b/pkg/sampling/README.md @@ -0,0 +1,23 @@ +# pkg/sampling + +## Overview + +This package contains utilities for parsing and interpreting the W3C +[TraceState](https://www.w3.org/TR/trace-context/#tracestate-header) +and all sampling-relevant fields specified by OpenTelemetry that may +be found in the OpenTelemetry section of the W3C TraceState. + +This package implements the draft specification in [OTEP +235](https://github.com/open-telemetry/oteps/pull/235), which +specifies two fields used by the OpenTelemetry consistent probability +sampling scheme. + +These are: + +- `th`: the Threshold used to determine whether a TraceID is sampled +- `rv`: an explicit randomness value, which overrides randomness in the TraceID + +[OTEP 235](https://github.com/open-telemetry/oteps/pull/235) contains +details on how to interpret these fields. The are not meant to be +human readable, with a few exceptions. The tracestate entry `ot=th:0` +indicates 100% sampling. diff --git a/pkg/sampling/common.go b/pkg/sampling/common.go new file mode 100644 index 0000000000000..ad94bac763eb5 --- /dev/null +++ b/pkg/sampling/common.go @@ -0,0 +1,125 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" + +import ( + "errors" + "io" + "strings" + + "go.uber.org/multierr" +) + +// KV represents a key-value parsed from a section of the TraceState. +type KV struct { + Key string + Value string +} + +var ( + // ErrTraceStateSize is returned when a TraceState is over its + // size limit, as specified by W3C. + ErrTraceStateSize = errors.New("invalid tracestate size") +) + +// keyValueScanner defines distinct scanner behaviors for lists of +// key-values. +type keyValueScanner struct { + // maxItems is 32 or -1 + maxItems int + // trim is set if OWS (optional whitespace) should be removed + trim bool + // separator is , or ; + separator byte + // equality is = or : + equality byte +} + +// commonTraceState is embedded in both W3C and OTel trace states. +type commonTraceState struct { + kvs []KV +} + +// ExtraValues returns additional values are carried in this +// tracestate object (W3C or OpenTelemetry). +func (cts commonTraceState) ExtraValues() []KV { + return cts.kvs +} + +// trimOws removes optional whitespace on both ends of a string. +// this uses the strict definition for optional whitespace tiven +// in https://www.w3.org/TR/trace-context/#tracestate-header-field-values +func trimOws(input string) string { + return strings.Trim(input, " \t") +} + +// scanKeyValues is common code to scan either W3C or OTel tracestate +// entries, as parameterized in the keyValueScanner struct. +func (s keyValueScanner) scanKeyValues(input string, f func(key, value string) error) error { + var rval error + items := 0 + for input != "" { + items++ + if s.maxItems > 0 && items >= s.maxItems { + // W3C specifies max 32 entries, tested here + // instead of via the regexp. + return ErrTraceStateSize + } + + sep := strings.IndexByte(input, s.separator) + + var member string + if sep < 0 { + member = input + input = "" + } else { + member = input[:sep] + input = input[sep+1:] + } + + if s.trim { + // Trim only required for W3C; OTel does not + // specify whitespace for its value encoding. + member = trimOws(member) + } + + if member == "" { + // W3C allows empty list members. + continue + } + + eq := strings.IndexByte(member, s.equality) + if eq < 0 { + // We expect to find the `s.equality` + // character in this string because we have + // already validated the whole input syntax + // before calling this parser. I.e., this can + // never happen, and if it did, the result + // would be to skip malformed entries. + continue + } + if err := f(member[:eq], member[eq+1:]); err != nil { + rval = multierr.Append(rval, err) + } + } + return rval +} + +// serializer assists with checking and combining errors from +// (io.StringWriter).WriteString(). +type serializer struct { + writer io.StringWriter + err error +} + +// write handles errors from io.StringWriter. +func (ser *serializer) write(str string) { + _, err := ser.writer.WriteString(str) + ser.check(err) +} + +// check handles errors (e.g., from another serializer). +func (ser *serializer) check(err error) { + ser.err = multierr.Append(ser.err, err) +} diff --git a/pkg/sampling/doc.go b/pkg/sampling/doc.go new file mode 100644 index 0000000000000..4b23f1fb85ba6 --- /dev/null +++ b/pkg/sampling/doc.go @@ -0,0 +1,89 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +// # TraceState representation +// +// A [W3CTraceState] object parses and stores the OpenTelemetry +// tracestate field and any other fields that are present in the +// W3C tracestate header, part of the [W3C tracecontext specification]. +// +// An [OpenTelemetryTraceState] object parses and stores fields of +// the OpenTelemetry-specific tracestate field, including those recognized +// for probability sampling and any other fields that are present. The +// syntax of the OpenTelemetry field is specified in [Tracestate handling]. +// +// The probability sampling-specific fields used here are specified in +// [OTEP 235]. The principal named fields are: +// +// - T-value: The sampling rejection threshold, expresses a 56-bit +// hexadecimal number of traces that will be rejected by sampling. +// - R-value: The sampling randomness value can be implicit in a TraceID, +// otherwise it is explicitly encoded as an R-value. +// +// # Low-level types +// +// The three key data types implemented in this package represent sampling +// decisions. +// +// - [Threshold]: Represents an exact sampling probability. +// - [Randomness]: Randomness used for sampling decisions. +// - [Threshold.Probability]: a float64 in the range [MinSamplingProbability, 1.0]. +// +// # Example use-case +// +// To configure a consistent tail sampler in an OpenTelemetry +// Collector using a fixed probability for all traces in an +// "equalizing" arrangement, where the effect of sampling is +// conditioned on how much sampling has already taken place, use the +// following pseudocode. +// +// func Setup() { +// // Get a fixed probability value from the configuration, in +// // the range (0, 1]. +// probability := *FLAG_probability +// +// // Calculate the sampling threshold from probability using 3 +// // hex digits of precision. +// fixedThreshold, err = ProbabilityToThresholdWithPrecision(probability, 3) +// if err != nil { +// // error case: Probability is not valid. +// } +// } +// +// func MakeDecision(tracestate string, tid TraceID) bool { +// // Parse the incoming tracestate +// ts, err := NewW3CTraceState(tracestate) +// if err != nil { +// // error case: Tracestate is ill-formed. +// } +// // For an absolute probability sample, we check the incoming +// // tracestate to see whether it was already sampled enough. +// if len(ts.OTelValue().TValue()) != 0 { +// // If the incoming tracestate was already sampled at +// // least as much as our threshold implies, then its +// // (rejection) threshold is higher. If so, then no +// // further sampling is called for. +// if ThresholdGreater(ts.OTelValue().TValueThreshold(), fixedThreshold) { +// return true +// } +// } +// var rnd Randomness +// // If the R-value is present, use it. If not, rely on TraceID +// // randomness. Note that OTLP v1.1.0 introduces a new Span flag +// // to convey trace randomness correctly, and if the context has +// // neither the randomness bit set or the R-value set, we need a +// // fallback, which can be to synthesize an R-value or to assume +// // the TraceID has sufficient randomness. This detail is left +// // out of scope. +// if rval, hasRval := ts.OTelValue().RValueRandomness(); hasRv { +// rnd = rval +// } else { +// rnd = TraceIDToRandomness(tid) +// } +// +// return fixedThreshold.ShouldSample(rnd) +// } +// +// [W3C tracecontext specification]: https://www.w3.org/TR/trace-context/#tracestate-header +// [Tracestate handling]: https://opentelemetry.io/docs/specs/otel/trace/tracestate-handling/ +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" diff --git a/pkg/sampling/encoding_test.go b/pkg/sampling/encoding_test.go new file mode 100644 index 0000000000000..2d4bbcd86fabf --- /dev/null +++ b/pkg/sampling/encoding_test.go @@ -0,0 +1,302 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sampling + +import ( + "encoding/binary" + "errors" + "fmt" + "math/rand" + "strconv" + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" +) + +// must panics when the error is non-nil and returns the +// generic argument. this does not call require.NoError() in order to +// use a one-line test calling convention, meaning `must(functionCall())` +// ensures there is no error and returns the first argument. +// +// To do the same with NoError() means rewriting the expression with +// a two-line statement: +// +// value, err := functionCall() +// require.NoError(t, value) +func must[T any](t T, err error) T { + if err != nil { + panic(err) + } + return t +} + +// mustNot panics when the error is nil. this does not call +// require.Error() in order to use a one-line test calling +// convention, meaning `mustNot(functionCall())` ensures there is an +// error without requiring a separate variable assignment. +// +// To do the same with Error() means rewriting the expression with +// a two-line statement: +// +// _, err := functionCall() +// require.Error(t, value) +func mustNot[T any](_ T, err error) error { + if err == nil { + return fmt.Errorf("expected an error, got nil") + } + return err +} + +func probabilityToTValue(prob float64) (string, error) { + th, err := ProbabilityToThreshold(prob) + return th.TValue(), err +} + +func tValueToProbability(tv string) (float64, error) { + th, err := TValueToThreshold(tv) + return th.Probability(), err +} + +func TestValidProbabilityToTValue(t *testing.T) { + require.Equal(t, "0", must(probabilityToTValue(1.0))) + require.Equal(t, "8", must(probabilityToTValue(0.5))) + require.Equal(t, "ffffffffffffff", must(probabilityToTValue(0x1p-56))) + require.Equal(t, "aaaaaaaaaaaaac", must(probabilityToTValue(1/3.))) + require.Equal(t, "55555555555558", must(probabilityToTValue(2/3.))) + require.Equal(t, "54", must(probabilityToTValue(1-0x54p-8))) // 0x54p-8 is approximately 1/3 + require.Equal(t, "01", must(probabilityToTValue(1-0x1p-8))) +} + +func TestThresholdGreater(t *testing.T) { + require.True(t, ThresholdGreater( + must(TValueToThreshold("5")), + must(TValueToThreshold("4")), + )) + + require.True(t, ThresholdGreater( + must(TValueToThreshold("4")), + must(TValueToThreshold("04")), + )) + + require.False(t, ThresholdGreater( + must(TValueToThreshold("234")), + must(TValueToThreshold("4")), + )) + + require.True(t, ThresholdGreater( + must(TValueToThreshold("4")), + must(TValueToThreshold("234")), + )) +} + +func TestInvalidprobabilityToTValue(t *testing.T) { + // Too small + require.Error(t, mustNot(probabilityToTValue(0x1p-57))) + require.Error(t, mustNot(probabilityToTValue(0x1p-57))) + + // Too big + require.Error(t, mustNot(probabilityToTValue(1.1))) + require.Error(t, mustNot(probabilityToTValue(1.1))) +} + +func TestTValueToProbability(t *testing.T) { + require.Equal(t, 0.5, must(tValueToProbability("8"))) + require.Equal(t, 1-0x444p-12, must(tValueToProbability("444"))) + require.Equal(t, 1.0, must(tValueToProbability("0"))) + + // 0x55555554p-32 is very close to 1/3 + require.InEpsilon(t, 1-1/3., must(tValueToProbability("55555554")), 1e-9) +} + +func TestProbabilityToThreshold(t *testing.T) { + require.Equal(t, + must(TValueToThreshold("8")), + must(ProbabilityToThreshold(0.5))) + require.Equal(t, + must(TValueToThreshold("ffffffffffffff")), + must(ProbabilityToThreshold(0x1p-56))) + require.Equal(t, + must(TValueToThreshold("ffffffffffff00")), + must(ProbabilityToThreshold(0x100p-56))) + require.Equal(t, + must(TValueToThreshold("00000000000010")), + must(ProbabilityToThreshold(1.0-0x1p-52))) + require.Equal(t, + AlwaysSampleThreshold, + must(ProbabilityToThreshold(1.0))) + + zt, err := ProbabilityToThreshold(0) + require.Equal(t, zt, AlwaysSampleThreshold) + require.Error(t, err) + require.Equal(t, err, ErrProbabilityRange) +} + +func TestShouldSample(t *testing.T) { + // Test four boundary conditions for 50% sampling, + thresh := must(ProbabilityToThreshold(0.5)) + // Smallest TraceID that should NOT sample. + require.False(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ + // 9 meaningless bytes + 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, + 0, // randomness starts here + 0, 0, 0, 0, 0, 0, + }))) + // Largest TraceID that should NOT sample. + require.False(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ + // 9 meaningless bytes + 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, + 0x7f, // randomness starts here + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }))) + // Smallest TraceID that should sample. + require.True(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ + // 9 meaningless bytes + 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, + 0x80, // randomness starts here + 0, 0, 0, 0, 0, 0, + }))) + // Largest TraceID that should sample. + require.True(t, thresh.ShouldSample(TraceIDToRandomness(pcommon.TraceID{ + // 9 meaningless bytes + 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, + 0xff, // randomness starts here + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }))) +} + +func TestRValueSyntax(t *testing.T) { + type testCase struct { + in string + expectErr error + } + for _, test := range []testCase{ + // correct cases + {"12341234123412", nil}, + + // wrong size + {"123412341234120", ErrRValueSize}, + {"1234123412341", ErrRValueSize}, + {"", ErrRValueSize}, + + // bad syntax + {"abcdefgabcdefg", strconv.ErrSyntax}, + } { + t.Run(testName(test.in), func(t *testing.T) { + rnd, err := RValueToRandomness(test.in) + + if test.expectErr != nil { + require.True(t, errors.Is(err, test.expectErr), + "%q: not expecting %v wanted %v", test.in, err, test.expectErr, + ) + require.Equal(t, must(RValueToRandomness("00000000000000")), rnd) + } else { + require.NoError(t, err, "%q", test.in) + + val, err := strconv.ParseUint(test.in, 16, 64) + require.NoError(t, err) + + require.Equal(t, TraceIDToRandomness( + // This explicitly constructs a TraceID from 9 random + // bytes plus the 7 lowest bytes of the input value. + pcommon.TraceID{ + byte(rand.Intn(256)), // 0 + byte(rand.Intn(256)), // 1 + byte(rand.Intn(256)), // 2 + byte(rand.Intn(256)), // 3 + byte(rand.Intn(256)), // 4 + byte(rand.Intn(256)), // 5 + byte(rand.Intn(256)), // 6 + byte(rand.Intn(256)), // 7 + byte(rand.Intn(256)), // 8 + byte(val >> 48 & 0xff), // 9 + byte(val >> 40 & 0xff), // 10 + byte(val >> 32 & 0xff), // 11 + byte(val >> 24 & 0xff), // 12 + byte(val >> 16 & 0xff), // 13 + byte(val >> 8 & 0xff), // 14 + byte(val >> 0 & 0xff), // 15 + }, + ), rnd) + } + }) + } +} + +func TestTValueSyntax(t *testing.T) { + type testCase struct { + in string + expectErr error + } + for _, test := range []testCase{ + // correct cases + {"1", nil}, + + // syntax error + {"", ErrTValueEmpty}, + {"g", strconv.ErrSyntax}, + } { + t.Run(testName(test.in), func(t *testing.T) { + _, err := TValueToThreshold(test.in) + + if test.expectErr != nil { + require.True(t, errors.Is(err, test.expectErr), + "%q: not expecting %v wanted %v", test.in, err, test.expectErr, + ) + } else { + require.NoError(t, err, "%q", test.in) + } + }) + } +} + +// There were two benchmarks used to choose the implementation for the +// Threshold type in this package. The results indicate that it is +// faster to compare a 56-bit number than to compare as 7 element +// []byte. + +type benchTIDs [1024]pcommon.TraceID + +func (tids *benchTIDs) init() { + for i := range tids { + binary.BigEndian.PutUint64(tids[i][:8], rand.Uint64()) + binary.BigEndian.PutUint64(tids[i][8:], rand.Uint64()) + } +} + +// The current implementation, using unsigned: +// +// BenchmarkThresholdCompareAsUint64-10 1000000000 0.4515 ns/op 0 B/op 0 allocs/op +// +// vs the tested and rejected, using bytes: +// +// BenchmarkThresholdCompareAsBytes-10 528679580 2.288 ns/op 0 B/op 0 allocs/op +func BenchmarkThresholdCompareAsUint64(b *testing.B) { + var tids benchTIDs + var comps [1024]Threshold + tids.init() + for i := range comps { + var err error + comps[i], err = ProbabilityToThreshold(rand.Float64()) + if err != nil { + b.Fatal(err) + } + } + + b.ReportAllocs() + b.ResetTimer() + yes := 0 + no := 0 + for i := 0; i < b.N; i++ { + idx := i % len(tids) + tid := tids[idx] + comp := comps[idx] + + if comp.ShouldSample(TraceIDToRandomness(tid)) { + yes++ + } else { + no++ + } + } +} diff --git a/pkg/sampling/go.mod b/pkg/sampling/go.mod new file mode 100644 index 0000000000000..34a2fc3d5df11 --- /dev/null +++ b/pkg/sampling/go.mod @@ -0,0 +1,24 @@ +module github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling + +go 1.20 + +require ( + github.com/stretchr/testify v1.8.4 + go.opentelemetry.io/collector/pdata v1.0.2-0.20240130181942-9c7177496fd5 + go.uber.org/multierr v1.11.0 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.3 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + golang.org/x/net v0.18.0 // indirect + golang.org/x/sys v0.14.0 // indirect + golang.org/x/text v0.14.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 // indirect + google.golang.org/grpc v1.61.0 // indirect + google.golang.org/protobuf v1.32.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/pkg/sampling/go.sum b/pkg/sampling/go.sum new file mode 100644 index 0000000000000..422eac3118e9a --- /dev/null +++ b/pkg/sampling/go.sum @@ -0,0 +1,70 @@ +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opentelemetry.io/collector/pdata v1.0.2-0.20240130181942-9c7177496fd5 h1:cMc7sJ29OzK5jZqr6XFzKxiJvlypR/zt2TDhPDqpBic= +go.opentelemetry.io/collector/pdata v1.0.2-0.20240130181942-9c7177496fd5/go.mod h1:IDkDj+B4Fp4wWOclBELN97zcb98HugJ8Q2gA4ZFsN8Q= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.18.0 h1:mIYleuAkSbHh0tCv7RvjL3F6ZVbLjq4+R7zbOn3Kokg= +golang.org/x/net v0.18.0/go.mod h1:/czyP5RqHAH4odGYxBJ1qz0+CE5WZ+2j1YgoEo8F2jQ= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q= +golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 h1:Jyp0Hsi0bmHXG6k9eATXoYtjd6e2UzZ1SCn/wIupY14= +google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17/go.mod h1:oQ5rr10WTTMvP4A36n8JpR1OrO1BEiV4f78CneXZxkA= +google.golang.org/grpc v1.61.0 h1:TOvOcuXn30kRao+gfcvsebNEa5iZIiLkisYEkf7R7o0= +google.golang.org/grpc v1.61.0/go.mod h1:VUbo7IFqmF1QtCAstipjG0GIoq49KvMe9+h1jFLBNJs= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= +google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/pkg/sampling/metadata.yaml b/pkg/sampling/metadata.yaml new file mode 100644 index 0000000000000..8fe70cf1d9ecf --- /dev/null +++ b/pkg/sampling/metadata.yaml @@ -0,0 +1,4 @@ +status: + class: pkg + codeowners: + active: [kentquirk, jmacd] diff --git a/pkg/sampling/oteltracestate.go b/pkg/sampling/oteltracestate.go new file mode 100644 index 0000000000000..22babccda6276 --- /dev/null +++ b/pkg/sampling/oteltracestate.go @@ -0,0 +1,227 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" + +import ( + "errors" + "io" + "regexp" + "strconv" +) + +// OpenTelemetryTraceState represents the `ot` section of the W3C tracestate +// which is specified generically in https://opentelemetry.io/docs/specs/otel/trace/tracestate-handling/. +// +// OpenTelemetry defines two specific values that convey sampling +// probability, known as T-Value (with "th", for threshold), R-Value +// (with key "rv", for random value), and extra values. +type OpenTelemetryTraceState struct { + commonTraceState + + // sampling r and t-values + rnd Randomness // r value parsed, as unsigned + rvalue string // 14 ASCII hex digits + threshold Threshold // t value parsed, as a threshold + tvalue string // 1-14 ASCII hex digits +} + +const ( + // rValueFieldName is the OTel tracestate field for R-value + rValueFieldName = "rv" + // tValueFieldName is the OTel tracestate field for T-value + tValueFieldName = "th" + + // hardMaxOTelLength is the maximum encoded size of an OTel + // tracestate value. + hardMaxOTelLength = 256 + + // chr = ucalpha / lcalpha / DIGIT / "." / "_" / "-" + // ucalpha = %x41-5A ; A-Z + // lcalpha = %x61-7A ; a-z + // key = lcalpha *(lcalpha / DIGIT ) + // value = *(chr) + // list-member = key ":" value + // list = list-member *( ";" list-member ) + otelKeyRegexp = lcAlphaRegexp + lcAlphanumRegexp + `*` + otelValueRegexp = `[a-zA-Z0-9._\-]*` + otelMemberRegexp = `(?:` + otelKeyRegexp + `:` + otelValueRegexp + `)` + otelSemicolonMemberRegexp = `(?:` + `;` + otelMemberRegexp + `)` + otelTracestateRegexp = `^` + otelMemberRegexp + otelSemicolonMemberRegexp + `*$` +) + +var ( + otelTracestateRe = regexp.MustCompile(otelTracestateRegexp) + + otelSyntax = keyValueScanner{ + maxItems: -1, + trim: false, + separator: ';', + equality: ':', + } + + // ErrInconsistentSampling is returned when a sampler update + // is illogical, indicating that the tracestate was not + // modified. Preferably, Samplers will avoid seeing this + // error by using a ThresholdGreater() test, which allows them + // to report a more clear error to the user. For example, if + // data arrives sampled at 1/100 and an equalizing sampler is + // configured for 1/2 sampling, the Sampler may detect the + // illogical condition itself using ThresholdGreater and skip + // the call to UpdateTValueWithSampling, which will have no + // effect and return this error. How a sampler decides to + // handle this condition is up to the sampler: for example the + // equalizing sampler can decide to pass through a span + // indicating 1/100 sampling or it can reject the span. + ErrInconsistentSampling = errors.New("cannot raise existing sampling probability") +) + +// NewOpenTelemetryTraceState returns a parsed representation of the +// OpenTelemetry tracestate section. Errors indicate an invalid +// tracestate was received. +func NewOpenTelemetryTraceState(input string) (OpenTelemetryTraceState, error) { + otts := OpenTelemetryTraceState{} + + if len(input) > hardMaxOTelLength { + return otts, ErrTraceStateSize + } + + if !otelTracestateRe.MatchString(input) { + return otts, strconv.ErrSyntax + } + + err := otelSyntax.scanKeyValues(input, func(key, value string) error { + var err error + switch key { + case rValueFieldName: + if otts.rnd, err = RValueToRandomness(value); err == nil { + otts.rvalue = value + } else { + // The zero-value for randomness implies always-sample; + // the threshold test is R < T, but T is not meaningful + // at zero, and this value implies zero adjusted count. + otts.rvalue = "" + otts.rnd = Randomness{} + } + case tValueFieldName: + if otts.threshold, err = TValueToThreshold(value); err == nil { + otts.tvalue = value + } else { + otts.tvalue = "" + otts.threshold = AlwaysSampleThreshold + } + default: + otts.kvs = append(otts.kvs, KV{ + Key: key, + Value: value, + }) + } + return err + }) + + return otts, err +} + +// RValue returns the R-value (key: "rv") as a string or empty if +// there is no R-value set. +func (otts *OpenTelemetryTraceState) RValue() string { + return otts.rvalue +} + +// RValueRandomness returns the randomness object corresponding with +// RValue() and a boolean indicating whether the R-value is set. +func (otts *OpenTelemetryTraceState) RValueRandomness() (Randomness, bool) { + return otts.rnd, len(otts.rvalue) != 0 +} + +// TValue returns the T-value (key: "th") as a string or empty if +// there is no T-value set. +func (otts *OpenTelemetryTraceState) TValue() string { + return otts.tvalue +} + +// TValueThreshold returns the threshold object corresponding with +// TValue() and a boolean (equal to len(TValue()) != 0 indicating +// whether the T-value is valid. +func (otts *OpenTelemetryTraceState) TValueThreshold() (Threshold, bool) { + return otts.threshold, len(otts.tvalue) != 0 +} + +// UpdateTValueWithSampling modifies the TValue of this object, which +// changes its adjusted count. If the change of TValue leads to +// inconsistency (i.e., raising sampling probability), an error is +// returned. +func (otts *OpenTelemetryTraceState) UpdateTValueWithSampling(sampledThreshold Threshold, encodedTValue string) error { + if len(otts.TValue()) != 0 && ThresholdGreater(otts.threshold, sampledThreshold) { + return ErrInconsistentSampling + } + otts.threshold = sampledThreshold + otts.tvalue = encodedTValue + return nil +} + +// AdjustedCount returns the adjusted count implied by this TValue. +// This term is defined here: +// https://opentelemetry.io/docs/specs/otel/trace/tracestate-probability-sampling/ +func (otts *OpenTelemetryTraceState) AdjustedCount() float64 { + if len(otts.TValue()) == 0 { + return 0 + } + return 1.0 / otts.threshold.Probability() +} + +// ClearTValue is used to unset TValue, in cases where it is +// inconsistent on arrival. +func (otts *OpenTelemetryTraceState) ClearTValue() { + otts.tvalue = "" + otts.threshold = Threshold{} +} + +// SetRValue establishes explicit randomness for this TraceState. +func (otts *OpenTelemetryTraceState) SetRValue(randomness Randomness) { + otts.rnd = randomness + otts.rvalue = randomness.RValue() +} + +// ClearRValue unsets explicit randomness. +func (otts *OpenTelemetryTraceState) ClearRValue() { + otts.rvalue = "" + otts.rnd = Randomness{} +} + +// HasAnyValue returns true if there are any fields in this +// tracestate, including any extra values. +func (otts *OpenTelemetryTraceState) HasAnyValue() bool { + return len(otts.RValue()) != 0 || len(otts.TValue()) != 0 || len(otts.ExtraValues()) != 0 +} + +// Serialize encodes this TraceState object. +func (otts *OpenTelemetryTraceState) Serialize(w io.StringWriter) error { + ser := serializer{writer: w} + cnt := 0 + sep := func() { + if cnt != 0 { + ser.write(";") + } + cnt++ + } + if len(otts.RValue()) != 0 { + sep() + ser.write(rValueFieldName) + ser.write(":") + ser.write(otts.RValue()) + } + if len(otts.TValue()) != 0 { + sep() + ser.write(tValueFieldName) + ser.write(":") + ser.write(otts.TValue()) + } + for _, kv := range otts.ExtraValues() { + sep() + ser.write(kv.Key) + ser.write(":") + ser.write(kv.Value) + } + return ser.err +} diff --git a/pkg/sampling/oteltracestate_test.go b/pkg/sampling/oteltracestate_test.go new file mode 100644 index 0000000000000..b14c05690b535 --- /dev/null +++ b/pkg/sampling/oteltracestate_test.go @@ -0,0 +1,344 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sampling + +import ( + "errors" + "fmt" + "strconv" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +// ExampleOpenTelemetryTraceState_AdjustedCount shows how to access +// the adjusted count for a sampled context when it has non-zero +// probability. +func ExampleOpenTelemetryTraceState_AdjustedCount() { + w3c, err := NewW3CTraceState("ot=th:c") + if err != nil { + panic(err) + } + ot := w3c.OTelValue() + + fmt.Printf("Adjusted count for T-value %q: %f", ot.TValue(), ot.AdjustedCount()) + + // Output: + // Adjusted count for T-value "c": 4.000000 +} + +func testName(in string) string { + if len(in) > 32 { + return in[:32] + "..." + } + return in +} + +func TestEmptyOpenTelemetryTraceState(t *testing.T) { + // Empty value is invalid + _, err := NewOpenTelemetryTraceState("") + require.Error(t, err) +} + +func TestOpenTelemetryTraceStateTValueSerialize(t *testing.T) { + const orig = "rv:10000000000000;th:3;a:b;c:d" + otts, err := NewOpenTelemetryTraceState(orig) + require.NoError(t, err) + require.Equal(t, "3", otts.TValue()) + tv, hasTv := otts.TValueThreshold() + require.True(t, hasTv) + require.Equal(t, 1-0x3p-4, tv.Probability()) + + require.NotEqual(t, "", otts.RValue()) + require.Equal(t, "10000000000000", otts.RValue()) + rv, hasRv := otts.RValueRandomness() + require.True(t, hasRv) + require.Equal(t, "10000000000000", rv.RValue()) + + require.True(t, otts.HasAnyValue()) + var w strings.Builder + require.NoError(t, otts.Serialize(&w)) + require.Equal(t, orig, w.String()) +} + +func TestOpenTelemetryTraceStateZero(t *testing.T) { + const orig = "th:0" + otts, err := NewOpenTelemetryTraceState(orig) + require.NoError(t, err) + require.True(t, otts.HasAnyValue()) + require.Equal(t, "0", otts.TValue()) + tv, hasTv := otts.TValueThreshold() + require.True(t, hasTv) + require.Equal(t, 1.0, tv.Probability()) + + var w strings.Builder + require.NoError(t, otts.Serialize(&w)) + require.Equal(t, orig, w.String()) +} + +func TestOpenTelemetryTraceStateRValuePValue(t *testing.T) { + // Ensures the caller can handle RValueSizeError and search + // for p-value in extra-values. + const orig = "rv:3;p:2" + otts, err := NewOpenTelemetryTraceState(orig) + require.Error(t, err) + require.Equal(t, ErrRValueSize, err) + require.Equal(t, "", otts.RValue()) + + // The error is oblivious to the old r-value, but that's ok. + require.Contains(t, err.Error(), "14 hex digits") + + require.Equal(t, []KV{{"p", "2"}}, otts.ExtraValues()) + + var w strings.Builder + require.NoError(t, otts.Serialize(&w)) + require.Equal(t, "p:2", w.String()) +} + +func TestOpenTelemetryTraceStateTValueUpdate(t *testing.T) { + const orig = "rv:abcdefabcdefab" + otts, err := NewOpenTelemetryTraceState(orig) + require.NoError(t, err) + require.Equal(t, "", otts.TValue()) + require.NotEqual(t, "", otts.RValue()) + + th, _ := TValueToThreshold("3") + require.NoError(t, otts.UpdateTValueWithSampling(th, "3")) + + require.Equal(t, "3", otts.TValue()) + tv, hasTv := otts.TValueThreshold() + require.True(t, hasTv) + require.Equal(t, 1-0x3p-4, tv.Probability()) + + const updated = "rv:abcdefabcdefab;th:3" + var w strings.Builder + require.NoError(t, otts.Serialize(&w)) + require.Equal(t, updated, w.String()) +} + +func TestOpenTelemetryTraceStateRTUpdate(t *testing.T) { + otts, err := NewOpenTelemetryTraceState("a:b") + require.NoError(t, err) + require.Equal(t, "", otts.TValue()) + require.Equal(t, "", otts.RValue()) + require.True(t, otts.HasAnyValue()) + + th, _ := TValueToThreshold("3") + require.NoError(t, otts.UpdateTValueWithSampling(th, "3")) + otts.SetRValue(must(RValueToRandomness("00000000000003"))) + + const updated = "rv:00000000000003;th:3;a:b" + var w strings.Builder + require.NoError(t, otts.Serialize(&w)) + require.Equal(t, updated, w.String()) +} + +func TestOpenTelemetryTraceStateRTClear(t *testing.T) { + otts, err := NewOpenTelemetryTraceState("a:b;rv:12341234123412;th:1234") + require.NoError(t, err) + + otts.ClearTValue() + otts.ClearRValue() + + const updated = "a:b" + var w strings.Builder + require.NoError(t, otts.Serialize(&w)) + require.Equal(t, updated, w.String()) +} + +func TestParseOpenTelemetryTraceState(t *testing.T) { + type testCase struct { + in string + rval string + tval string + extra []string + expectErr error + } + const ns = "" + for _, test := range []testCase{ + // t-value correct cases + {"th:2", ns, "2", nil, nil}, + {"th:ab", ns, "ab", nil, nil}, + {"th:abcdefabcdefab", ns, "abcdefabcdefab", nil, nil}, + + // correct with trailing zeros. the parser does not re-format + // to remove trailing zeros. + {"th:1000", ns, "1000", nil, nil}, + + // syntax errors + {"", ns, ns, nil, strconv.ErrSyntax}, + {"th:1;", ns, ns, nil, strconv.ErrSyntax}, + {"th:1=p:2", ns, ns, nil, strconv.ErrSyntax}, + {"th:1;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, + {":1;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, + {":;p:2=s:3", ns, ns, nil, strconv.ErrSyntax}, + {":;:", ns, ns, nil, strconv.ErrSyntax}, + {":", ns, ns, nil, strconv.ErrSyntax}, + {"th:;p=1", ns, ns, nil, strconv.ErrSyntax}, + {"th:$", ns, ns, nil, strconv.ErrSyntax}, // not-hexadecimal + {"th:0x1p+3", ns, ns, nil, strconv.ErrSyntax}, // + is invalid + {"th:14.5", ns, ns, nil, strconv.ErrSyntax}, // integer syntax + {"th:-1", ns, ns, nil, strconv.ErrSyntax}, // non-negative + + // too many digits + {"th:ffffffffffffffff", ns, ns, nil, ErrTValueSize}, + {"th:100000000000000", ns, ns, nil, ErrTValueSize}, + + // one field + {"e100:1", ns, ns, []string{"e100:1"}, nil}, + + // two fields + {"e1:1;e2:2", ns, ns, []string{"e1:1", "e2:2"}, nil}, + + // one extra key, two ways + {"th:2;extra:stuff", ns, "2", []string{"extra:stuff"}, nil}, + {"extra:stuff;th:2", ns, "2", []string{"extra:stuff"}, nil}, + + // two extra fields + {"e100:100;th:1;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"th:1;e100:100;e101:101", ns, "1", []string{"e100:100", "e101:101"}, nil}, + {"e100:100;e101:101;th:1", ns, "1", []string{"e100:100", "e101:101"}, nil}, + + // parse error prevents capturing unrecognized keys + {"1:1;u:V", ns, ns, nil, strconv.ErrSyntax}, + {"X:1;u:V", ns, ns, nil, strconv.ErrSyntax}, + {"x:1;u:V", ns, ns, []string{"x:1", "u:V"}, nil}, + + // r-value + {"rv:22222222222222;extra:stuff", "22222222222222", ns, []string{"extra:stuff"}, nil}, + {"extra:stuff;rv:22222222222222", "22222222222222", ns, []string{"extra:stuff"}, nil}, + {"rv:ffffffffffffff", "ffffffffffffff", ns, nil, nil}, + + // r-value range error (15 bytes of hex or more) + {"rv:100000000000000", ns, ns, nil, ErrRValueSize}, + {"rv:fffffffffffffffff", ns, ns, nil, ErrRValueSize}, + + // no trailing ; + {"x:1;", ns, ns, nil, strconv.ErrSyntax}, + + // empty key + {"x:", ns, ns, []string{"x:"}, nil}, + + // charset test + {"x:0X1FFF;y:.-_-.;z:", ns, ns, []string{"x:0X1FFF", "y:.-_-.", "z:"}, nil}, + {"x1y2z3:1-2-3;y1:y_1;xy:-;th:50", ns, "50", []string{"x1y2z3:1-2-3", "y1:y_1", "xy:-"}, nil}, + + // size exceeded + {"x:" + strings.Repeat("_", 255), ns, ns, nil, ErrTraceStateSize}, + {"x:" + strings.Repeat("_", 254), ns, ns, []string{"x:" + strings.Repeat("_", 254)}, nil}, + } { + t.Run(testName(test.in), func(t *testing.T) { + otts, err := NewOpenTelemetryTraceState(test.in) + + if test.expectErr != nil { + require.True(t, errors.Is(err, test.expectErr), "%q: not expecting %v wanted %v", test.in, err, test.expectErr) + } else { + require.NoError(t, err) + } + require.Equal(t, test.rval, otts.RValue()) + require.Equal(t, test.tval, otts.TValue()) + var expect []KV + for _, ex := range test.extra { + k, v, _ := strings.Cut(ex, ":") + expect = append(expect, KV{ + Key: k, + Value: v, + }) + } + require.Equal(t, expect, otts.ExtraValues()) + + if test.expectErr != nil { + return + } + // on success Serialize() should not modify + // test by re-parsing + var w strings.Builder + require.NoError(t, otts.Serialize(&w)) + cpy, err := NewOpenTelemetryTraceState(w.String()) + require.NoError(t, err) + require.Equal(t, otts, cpy) + }) + } +} + +func TestUpdateTValueWithSampling(t *testing.T) { + type testCase struct { + // The input otel tracestate; no error conditions tested + in string + + // The incoming adjusted count; defined whether + // t-value is present or not. + adjCountIn float64 + + // the update probability; threshold and tvalue are + // derived from this + prob float64 + + // when update error is expected + updateErr error + + // output t-value + out string + + // output adjusted count + adjCountOut float64 + } + for _, test := range []testCase{ + // 8/16 in, sampled at 2/16 (smaller prob) => adjCount 8 + {"th:8", 2, 0x2p-4, nil, "th:e", 8}, + + // 8/16 in, sampled at 14/16 (larger prob) => error, adjCount 2 + {"th:8", 2, 0xep-4, ErrInconsistentSampling, "th:8", 2}, + + // 1/16 in, 50% update (larger prob) => (error) + {"th:f", 16, 0x8p-4, ErrInconsistentSampling, "th:f", 16}, + + // 1/1 sampling in, 1/16 update + {"th:0", 1, 0x1p-4, nil, "th:f", 16}, + + // no t-value in, 1/16 update + {"", 0, 0x1p-4, nil, "th:f", 16}, + + // none in, 100% update + {"", 0, 1, nil, "th:0", 1}, + + // 1/2 in, 100% update (error) + {"th:8", 2, 1, ErrInconsistentSampling, "th:8", 2}, + + // 1/1 in, 0x1p-56 update + {"th:0", 1, 0x1p-56, nil, "th:ffffffffffffff", 0x1p56}, + + // 1/1 in, 0x1p-56 update + {"th:0", 1, 0x1p-56, nil, "th:ffffffffffffff", 0x1p56}, + + // 2/3 in, 1/3 update. Note that 0x555 + 0xaab = 0x1000. + {"th:555", 1 / (1 - 0x555p-12), 0x555p-12, nil, "th:aab", 1 / (1 - 0xaabp-12)}, + } { + t.Run(test.in+"/"+test.out, func(t *testing.T) { + otts := OpenTelemetryTraceState{} + if test.in != "" { + var err error + otts, err = NewOpenTelemetryTraceState(test.in) + require.NoError(t, err) + } + + require.Equal(t, test.adjCountIn, otts.AdjustedCount()) + + newTh, err := ProbabilityToThreshold(test.prob) + require.NoError(t, err) + + upErr := otts.UpdateTValueWithSampling(newTh, newTh.TValue()) + + require.Equal(t, test.updateErr, upErr) + + var outData strings.Builder + err = otts.Serialize(&outData) + require.NoError(t, err) + require.Equal(t, test.out, outData.String()) + + require.Equal(t, test.adjCountOut, otts.AdjustedCount()) + }) + } +} diff --git a/pkg/sampling/probability.go b/pkg/sampling/probability.go new file mode 100644 index 0000000000000..4b3498b8596bc --- /dev/null +++ b/pkg/sampling/probability.go @@ -0,0 +1,92 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" + +import ( + "errors" + "math" +) + +// ErrProbabilityRange is returned when a value should be in the range [1/MaxAdjustedCount, 1]. +var ErrProbabilityRange = errors.New("sampling probability out of the range [1/MaxAdjustedCount, 1]") + +// ErrPrecisionUnderflow is returned when a precision is too great for the range. +var ErrPrecisionUnderflow = errors.New("sampling precision is too great for the range") + +// MinSamplingProbability is the smallest representable probability +// and is the inverse of MaxAdjustedCount. +const MinSamplingProbability = 1.0 / MaxAdjustedCount + +// probabilityInRange tests MinSamplingProb <= prob <= 1. +func probabilityInRange(prob float64) bool { + return prob >= MinSamplingProbability && prob <= 1 +} + +// ProbabilityToThreshold converts a probability to a Threshold. It +// returns an error when the probability is out-of-range. +func ProbabilityToThreshold(prob float64) (Threshold, error) { + // Probability cases + if !probabilityInRange(prob) { + return AlwaysSampleThreshold, ErrProbabilityRange + } + + scaled := uint64(math.Round(prob * MaxAdjustedCount)) + + return Threshold{ + unsigned: MaxAdjustedCount - scaled, + }, nil +} + +// ProbabilityToThresholdWithPrecision is like ProbabilityToThreshold +// with support for reduced precision. The `prec` argument determines +// how many significant hex digits will be used to encode the exact +// probability. +func ProbabilityToThresholdWithPrecision(prob float64, prec uint8) (Threshold, error) { + // Assume full precision at 0. + if prec == 0 { + return ProbabilityToThreshold(prob) + } + if !probabilityInRange(prob) { + return AlwaysSampleThreshold, ErrProbabilityRange + } + // Special case for prob == 1. The logic for revising precision + // that follows requires 0 < 1 - prob < 1. + if prob == 1 { + return AlwaysSampleThreshold, nil + } + + // Adjust precision considering the significance of leading + // zeros. If we can multiply the rejection probability by 16 + // and still be less than 1, then there is a leading zero of + // obligatory precision. + for reject := 1 - prob; reject*16 < 1; { + reject *= 16 + prec++ + } + + // Check if leading zeros plus precision is above the maximum. + // This is called underflow because the requested precision + // leads to complete no significant figures. + if prec > NumHexDigits { + return AlwaysSampleThreshold, ErrPrecisionUnderflow + } + + scaled := uint64(math.Round(prob * MaxAdjustedCount)) + rscaled := MaxAdjustedCount - scaled + shift := 4 * (14 - prec) + half := uint64(1) << (shift - 1) + + rscaled += half + rscaled >>= shift + rscaled <<= shift + + return Threshold{ + unsigned: rscaled, + }, nil +} + +// Probability is the sampling ratio in the range [MinSamplingProb, 1]. +func (t Threshold) Probability() float64 { + return float64(MaxAdjustedCount-t.unsigned) / MaxAdjustedCount +} diff --git a/pkg/sampling/probability_test.go b/pkg/sampling/probability_test.go new file mode 100644 index 0000000000000..33b38d9deec70 --- /dev/null +++ b/pkg/sampling/probability_test.go @@ -0,0 +1,289 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" + +import ( + "fmt" + "math" + "testing" + + "github.com/stretchr/testify/require" +) + +// ExampleProbabilityToThresholdWithPrecision demonstrates how 1/3, +// 2/3, and 3/3 are encoded with precision 3. When working with +// arbitrary floating point values, it is recommended to use an +// explicit precision parameter so that T-values are both reasonably +// compact and accurate. +func ExampleProbabilityToThresholdWithPrecision() { + const divisor = 3.0 + const precision = 3 + + for dividend := 1.0; dividend <= divisor; dividend++ { + tval, _ := ProbabilityToThresholdWithPrecision(dividend/divisor, precision) + fmt.Println(tval.TValue()) + } + + // Output: + // aab + // 555 + // 0 +} + +// ExampleProbabilityToThreshold_rounding demonstrates that with full +// precision, the resulting t-value appears to round in an unexpected +// way. +func ExampleProbabilityToThreshold_rounding() { + // 1/3 sampling corresponds with a rejection threshold of (1 - 1/3). + const exampleProb = 1.0 / 3.0 + + // 1/3 in decimal is the repeating fraction of 6 (0.333333), while in + // hexadecimal it is the repeating fraction of a (0x0.555555). + tval, _ := ProbabilityToThreshold(exampleProb) + + // Note the trailing hex "c" below, which does not match + // intuition for a repeating pattern of hex "a" digits. Why + // is the final digit not hex "b"? The reason it is hex "c" + // is that ProbabilityToThreshold computes the number of spans + // selected as a 56-bit integer using a 52-bit significand. + // Because the fraction uses fewer bits than the threshold, + // the last digit rounds down, with 0x55555555555554 spans + // rejected out of 0x100000000000000. The subtraction of 0x4 + // from 0x10 leads to a trailing hex "c". + fmt.Println(tval.TValue()) + + // Output: + // aaaaaaaaaaaaac +} + +// ExampleProbabilityToThreshold_limitedprecision demonstrates the +// gap between Threshold values and probability values is not equal, +// clarifying which conversions are lossy. +func ExampleProbabilityToThreshold_limitedprecision() { + next := func(x float64, n int) float64 { + for ; n < 0; n++ { + x = math.Nextafter(x, 0) + } + return x + } + + // At probability 50% or above, only 52 bits of precision are + // available for floating point representation. + // + // In the range 1/2 to 1: 52 bits of precision are available; 4 trailing zero bits; + // In the range 1/4 to 1/2: 52 bits of precision are available; 3 trailing zero bits; + // In the range 1/8 to 1/4: 52 bits of precision are available; 2 trailing zero bits; + // In the range 1/16 to 1/8: 52 bits of precision are available; 1 trailing zero bits; + // Probabilties less than 1/16: 51 bits of precision are available + // Probabilties less than 1/32: 50 bits of precision are available. + // ... + // Probabilties less than 0x1p-N: 55-N bits of precision are available. + // ... + // Probabilities less than 0x1p-55: 0 bits of precision. + const large = 15.0 / 16 + const half = 8.0 / 16 + const quarter = 4.0 / 16 + const eighth = 2.0 / 16 + const small = 1.0 / 16 + for _, prob := range []float64{ + // Values from 1/2 to 15/16: last T-value digit always "8". + next(large, 0), + next(large, -1), + next(large, -2), + next(large, -3), + 0, + // Values from 1/4 to 1/2: last T-value digit always + // "4", "8", or "c". + next(half, 0), + next(half, -1), + next(half, -2), + next(half, -3), + 0, + // Values from 1/8 to 1/4, last T-value digit can be any + // even hex digit. + next(quarter, 0), + next(quarter, -1), + next(quarter, -2), + next(quarter, -3), + 0, + // Values from 1/16 to 1/8: Every adjacent probability + // value maps to an exact Threshold. + next(eighth, 0), + next(eighth, -1), + next(eighth, -2), + next(eighth, -3), + 0, + // Values less than 1/16 demonstrate lossy behavior. + // Here probability values can express more values + // than Thresholds can, so multiple probability values + // map to the same Threshold. Here, 1/16 and the next + // descending floating point value both map to T-value + // "f". + next(small, 0), + next(small, -1), + next(small, -2), + next(small, -3), + } { + if prob == 0 { + fmt.Println("--") + continue + } + tval, _ := ProbabilityToThreshold(prob) + fmt.Println(tval.TValue()) + } + + // Output: + // 1 + // 10000000000008 + // 1000000000001 + // 10000000000018 + // -- + // 8 + // 80000000000004 + // 80000000000008 + // 8000000000000c + // -- + // c + // c0000000000002 + // c0000000000004 + // c0000000000006 + // -- + // e + // e0000000000001 + // e0000000000002 + // e0000000000003 + // -- + // f + // f + // f0000000000001 + // f0000000000001 +} + +// ExampleProbabilityToThreshold_verysmall shows the smallest +// expressible sampling probability values. +func ExampleProbabilityToThreshold_verysmall() { + for _, prob := range []float64{ + MinSamplingProbability, // Skip 1 out of 2**56 + 0x2p-56, // Skip 2 out of 2**56 + 0x3p-56, // Skip 3 out of 2**56 + 0x4p-56, // Skip 4 out of 2**56 + 0x8p-56, // Skip 8 out of 2**56 + 0x10p-56, // Skip 0x10 out of 2**56 + } { + tval, _ := ProbabilityToThreshold(prob) + fmt.Println(tval.TValue()) + } + + // Output: + // ffffffffffffff + // fffffffffffffe + // fffffffffffffd + // fffffffffffffc + // fffffffffffff8 + // fffffffffffff +} + +func TestProbabilityToThresholdWithPrecision(t *testing.T) { + type kase struct { + prob float64 + exact string + rounded []string + } + + for _, test := range []kase{ + // Note: remember 8 is half of 16: hex rounds up at 8+, down at 7-. + { + 1 - 0x456789ap-28, + "456789a", + []string{ + "45678a", + "45679", + "4568", + "456", + "45", + "4", + }, + }, + // Add 3 leading zeros + { + 1 - 0x456789ap-40, + "000456789a", + []string{ + "00045678a", + "00045679", + "0004568", + "000456", + "00045", + "0004", + }, + }, + // Rounding up + { + 1 - 0x789abcdefp-40, + "0789abcdef", + []string{ + "0789abcdef", + "0789abcdf", + "0789abce", + "0789abd", + "0789ac", + "0789b", + "078a", + "079", + "08", + }, + }, + // Rounding down + { + 1 - 0x12345678p-32, + "12345678", + []string{ + "1234568", + "123456", + "12345", + "1234", + "123", + "12", + "1", + }, + }, + // Zeros + { + 1 - 0x80801p-28, + "0080801", + []string{ + "00808", + "008", + }, + }, + // 100% sampling + { + 1, + "0", + []string{ + "0", + }, + }, + } { + t.Run(test.exact, func(t *testing.T) { + th, err := ProbabilityToThreshold(test.prob) + require.NoError(t, err) + require.Equal(t, th.TValue(), test.exact) + + for _, round := range test.rounded { + t.Run(round, func(t *testing.T) { + // Requested precision is independent of leading zeros, + // so strip them to calculate test precision. + strip := round + for len(strip) > 0 && strip[0] == '0' { + strip = strip[1:] + } + rth, err := ProbabilityToThresholdWithPrecision(test.prob, uint8(len(strip))) + require.NoError(t, err) + require.Equal(t, round, rth.TValue()) + }) + } + }) + } +} diff --git a/pkg/sampling/randomness.go b/pkg/sampling/randomness.go new file mode 100644 index 0000000000000..1cda0da8cc878 --- /dev/null +++ b/pkg/sampling/randomness.go @@ -0,0 +1,89 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" + +import ( + "encoding/binary" + "errors" + "strconv" + + "go.opentelemetry.io/collector/pdata/pcommon" +) + +// numRandomnessValues equals MaxAdjustedCount--this variable has been +// introduced to improve readability. Recall that MaxAdjustedCount is +// 2**56 which is one greater than the maximum RValue +// ("ffffffffffffff", i.e., "100000000000000"). +const numRandomnessValues = MaxAdjustedCount + +// ErrRValueSize is returned by RValueToRandomess in case of +// unexpected size. +var ErrRValueSize = errors.New("r-value must have 14 hex digits") + +// leastHalfTraceIDThresholdMask is the mask to use on the +// least-significant half of the TraceID, i.e., bytes 8-15. +// Because this is a 56 bit mask, the result after masking is +// the unsigned value of bytes 9 through 15. +// +// This helps extract 56 bits of randomness from the second half of +// the TraceID, as specified in https://www.w3.org/TR/trace-context-2/#randomness-of-trace-id +const leastHalfTraceIDThresholdMask = MaxAdjustedCount - 1 + +// Randomness may be derived from R-value or TraceID. +// +// Randomness contains 56 bits of randomness, derived in one of two ways, see: +// https://www.w3.org/TR/trace-context-2/#randomness-of-trace-id +type Randomness struct { + // unsigned is in the range [0, MaxAdjustedCount-1] + unsigned uint64 +} + +// TraceIDToRandomness returns randomness from a TraceID (assumes +// the traceparent random flag was set). +func TraceIDToRandomness(id pcommon.TraceID) Randomness { + // To get the 56 bits we want, take the second half of the trace ID, + leastHalf := binary.BigEndian.Uint64(id[8:]) + return Randomness{ + // Then apply the mask to get the least-significant 56 bits / 7 bytes. + // Equivalently stated: zero the most-significant 8 bits. + unsigned: leastHalf & leastHalfTraceIDThresholdMask, + } +} + +// RValueToRandomness parses NumHexDigits hex bytes into a Randomness. +func RValueToRandomness(s string) (Randomness, error) { + if len(s) != NumHexDigits { + return Randomness{}, ErrRValueSize + } + + unsigned, err := strconv.ParseUint(s, hexBase, 64) + if err != nil { + return Randomness{}, err + } + + return Randomness{ + unsigned: unsigned, + }, nil +} + +// RValue formats the r-value encoding. +func (rnd Randomness) RValue() string { + // The important part here is to format a full 14-byte hex + // string, including leading zeros. We could accomplish the + // same with custom code or with fmt.Sprintf directives, but + // here we let strconv.FormatUint fill in leading zeros, as + // follows: + // + // Format (numRandomnessValues+Randomness) as a hex string + // Strip the leading hex digit, which is a "1" by design + // + // For example, a randomness that requires two leading zeros + // (all in hexadecimal): + // + // randomness is 7 bytes: aabbccddeeff + // numRandomnessValues is 2^56: 100000000000000 + // randomness+numRandomnessValues: 100aabbccddeeff + // strip the leading "1": 00aabbccddeeff + return strconv.FormatUint(numRandomnessValues+rnd.unsigned, hexBase)[1:] +} diff --git a/pkg/sampling/randomness_test.go b/pkg/sampling/randomness_test.go new file mode 100644 index 0000000000000..542629ca12bac --- /dev/null +++ b/pkg/sampling/randomness_test.go @@ -0,0 +1,38 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" + +import ( + "fmt" + + "go.opentelemetry.io/collector/pdata/pcommon" +) + +func ExampleTraceIDToRandomness() { + // TraceID represented in hex as "abababababababababd29d6a7215ced0" + var exampleTid = pcommon.TraceID{ + // 9 meaningless bytes + 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, + // 7 bytes randomness + 0xd2, 0x9d, 0x6a, 0x72, 0x15, 0xce, 0xd0, + } + rnd := TraceIDToRandomness(exampleTid) + + fmt.Printf("TraceIDToRandomness(%q).RValue() = %s", exampleTid, rnd.RValue()) + + // Output: TraceIDToRandomness("abababababababababd29d6a7215ced0").RValue() = d29d6a7215ced0 +} + +func ExampleRValueToRandomness() { + // Any 14 hex digits is a valid R-value. + const exampleRvalue = "d29d6a7215ced0" + + // This converts to the internal unsigned integer representation. + rnd, _ := RValueToRandomness(exampleRvalue) + + // The result prints the same as the input. + fmt.Printf("RValueToRandomness(%q).RValue() = %s", exampleRvalue, rnd.RValue()) + + // Output: RValueToRandomness("d29d6a7215ced0").RValue() = d29d6a7215ced0 +} diff --git a/pkg/sampling/threshold.go b/pkg/sampling/threshold.go new file mode 100644 index 0000000000000..af39be34cddf9 --- /dev/null +++ b/pkg/sampling/threshold.go @@ -0,0 +1,116 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" + +import ( + "errors" + "strconv" + "strings" +) + +const ( + // MaxAdjustedCount is 2^56 i.e. 0x100000000000000 i.e., 1<<56. + MaxAdjustedCount = 1 << 56 + + // NumHexDigits is the number of hex digits equalling 56 bits. + // This is the limit of sampling precision. + NumHexDigits = 56 / hexBits + + hexBits = 4 + hexBase = 16 +) + +// Threshold represents an exact sampling probability using 56 bits of +// precision. A Threshold expresses the number of spans, out of 2**56, +// that are rejected. +// +// These 56 bits are compared against 56 bits of randomness, either +// extracted from an R-value or a TraceID having the W3C-specified +// randomness bit set. +// +// Because Thresholds store 56 bits of information and floating point +// values store 52 bits of significand, some conversions between +// Threshold and probability values are lossy. The kinds of loss that +// occur depend on where in the probability scale it happens, as the +// step between adjacent floating point values adjusts with the exponent. +type Threshold struct { + // unsigned is in the range [0, MaxAdjustedCount] + // - 0 represents always sampling (0 Random values are less-than) + // - 1 represents sampling 1-in-(MaxAdjustedCount-1) + // - MaxAdjustedCount represents always sampling 1-in- + unsigned uint64 +} + +var ( + // ErrTValueSize is returned for t-values longer than NumHexDigits hex digits. + ErrTValueSize = errors.New("t-value exceeds 14 hex digits") + + // ErrEmptyTValue indicates no t-value was found, i.e., no threshold available. + ErrTValueEmpty = errors.New("t-value is empty") + + // AlwaysSampleThreshold represents 100% sampling. + AlwaysSampleThreshold = Threshold{unsigned: 0} +) + +// TValueToThreshold returns a Threshold. Because TValue strings +// have trailing zeros omitted, this function performs the reverse. +func TValueToThreshold(s string) (Threshold, error) { + if len(s) > NumHexDigits { + return AlwaysSampleThreshold, ErrTValueSize + } + if len(s) == 0 { + return AlwaysSampleThreshold, ErrTValueEmpty + } + + // Having checked length above, there are no range errors + // possible. Parse the hex string to an unsigned value. + unsigned, err := strconv.ParseUint(s, hexBase, 64) + if err != nil { + return AlwaysSampleThreshold, err // e.g. parse error + } + + // The unsigned value requires shifting to account for the + // trailing zeros that were omitted by the encoding (see + // TValue for the reverse). Compute the number to shift by: + extendByHexZeros := NumHexDigits - len(s) + return Threshold{ + unsigned: unsigned << (hexBits * extendByHexZeros), + }, nil +} + +// TValue encodes a threshold, which is a variable-length hex string +// up to 14 characters. The empty string is returned for 100% +// sampling. +func (th Threshold) TValue() string { + // Always-sample is a special case because TrimRight() below + // will trim it to the empty string, which represents no t-value. + if th == AlwaysSampleThreshold { + return "0" + } + // For thresholds other than the extremes, format a full-width + // (14 digit) unsigned value with leading zeros, then, remove + // the trailing zeros. Use the logic for (Randomness).RValue(). + digits := Randomness(th).RValue() + + // Remove trailing zeros. + return strings.TrimRight(digits, "0") +} + +// ShouldSample returns true when the span passes this sampler's +// consistent sampling decision. +func (th Threshold) ShouldSample(rnd Randomness) bool { + return rnd.unsigned >= th.unsigned +} + +// ThresholdGreater allows direct comparison of Threshold values. +// Greater thresholds equate with smaller sampling probabilities. +func ThresholdGreater(a, b Threshold) bool { + return a.unsigned > b.unsigned +} + +// ThresholdLessThan allows direct comparison of Threshold values. +// Smaller thresholds equate with greater sampling probabilities. +func ThresholdLessThan(a, b Threshold) bool { + return a.unsigned < b.unsigned +} diff --git a/pkg/sampling/threshold_test.go b/pkg/sampling/threshold_test.go new file mode 100644 index 0000000000000..bada769048746 --- /dev/null +++ b/pkg/sampling/threshold_test.go @@ -0,0 +1,68 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" + +import ( + "encoding/hex" + "fmt" + + "go.opentelemetry.io/collector/pdata/pcommon" +) + +// ExampleTValueToThreshold demonstrates how to convert a T-value +// string to a Threshold value. +func ExampleTValueToThreshold() { + // "c" corresponds with rejecting 3/4 traces (or 0xc out of + // 0x10), which is 25% sampling. + const exampleTvalue = "c" + + tval, _ := TValueToThreshold(exampleTvalue) + + fmt.Printf("Probability(%q) = %f", exampleTvalue, tval.Probability()) + + // Output: + // Probability("c") = 0.250000 +} + +// ExampleTValueToThreshold demonstrates how to calculate whether a +// Threshold, calculated from a T-value, should be sampled at a given +// probability. +func ExampleThreshold_ShouldSample() { + const exampleTvalue = "c" + const exampleRvalue = "d29d6a7215ced0" + + tval, _ := TValueToThreshold(exampleTvalue) + rval, _ := RValueToRandomness(exampleRvalue) + + fmt.Printf("TValueToThreshold(%q).ShouldSample(RValueToRandomness(%q) = %v", + tval.TValue(), rval.RValue(), tval.ShouldSample(rval)) + + // Output: + // TValueToThreshold("c").ShouldSample(RValueToRandomness("d29d6a7215ced0") = true +} + +// ExampleTValueToThreshold_traceid demonstrates how to calculate whether a +// Threshold, calculated from a T-value, should be sampled at a given +// probability. +func ExampleThreshold_ShouldSample_traceid() { + const exampleTvalue = "c" + + // The leading 9 bytes (18 hex digits) of the TraceID string + // are not used, only the trailing 7 bytes (14 hex digits, + // i.e., 56 bits) are used. Here, the dont-care digits are + // set to 0xab and the R-value is "bd29d6a7215ced0". + const exampleHexTraceID = "abababababababababd29d6a7215ced0" + var tid pcommon.TraceID + idbytes, _ := hex.DecodeString(exampleHexTraceID) + copy(tid[:], idbytes) + + tval, _ := TValueToThreshold(exampleTvalue) + rval := TraceIDToRandomness(tid) + + fmt.Printf("TValueToThreshold(%q).ShouldSample(TraceIDToRandomness(%q) = %v", + tval.TValue(), exampleHexTraceID, tval.ShouldSample(rval)) + + // Output: + // TValueToThreshold("c").ShouldSample(TraceIDToRandomness("abababababababababd29d6a7215ced0") = true +} diff --git a/pkg/sampling/w3ctracestate.go b/pkg/sampling/w3ctracestate.go new file mode 100644 index 0000000000000..a0ebac614d6a4 --- /dev/null +++ b/pkg/sampling/w3ctracestate.go @@ -0,0 +1,177 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sampling // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling" + +import ( + "io" + "regexp" + "strconv" + "strings" +) + +// W3CTraceState represents the a parsed W3C `tracestate` header. +// +// This type receives and passes through `tracestate` fields defined +// by all vendors, while it parses and validates the +// [OpenTelemetryTraceState] field. After parsing the W3CTraceState, +// access the OpenTelemetry-defined fields using +// [W3CTraceState.OTelValue]. +type W3CTraceState struct { + // commonTraceState holds "extra" values (e.g., + // vendor-specific tracestate fields) which are propagated but + // not used by Sampling logic. + commonTraceState + + // otts stores OpenTelemetry-specified tracestate fields. + otts OpenTelemetryTraceState +} + +const ( + hardMaxNumPairs = 32 + hardMaxW3CLength = 1024 + hardMaxKeyLength = 256 + hardMaxTenantLength = 241 + hardMaxSystemLength = 14 + + otelVendorCode = "ot" + + // keyRegexp is not an exact test, it permits all the + // characters and then we check various conditions. + + // key = simple-key / multi-tenant-key + // simple-key = lcalpha 0*255( lcalpha / DIGIT / "_" / "-"/ "*" / "/" ) + // multi-tenant-key = tenant-id "@" system-id + // tenant-id = ( lcalpha / DIGIT ) 0*240( lcalpha / DIGIT / "_" / "-"/ "*" / "/" ) + // system-id = lcalpha 0*13( lcalpha / DIGIT / "_" / "-"/ "*" / "/" ) + // lcalpha = %x61-7A ; a-z + + lcAlphaRegexp = `[a-z]` + lcAlphanumPunctRegexp = `[a-z0-9\-\*/_]` + lcAlphanumRegexp = `[a-z0-9]` + multiTenantSep = `@` + tenantIDRegexp = lcAlphanumRegexp + lcAlphanumPunctRegexp + `*` + systemIDRegexp = lcAlphaRegexp + lcAlphanumPunctRegexp + `*` + multiTenantKeyRegexp = tenantIDRegexp + multiTenantSep + systemIDRegexp + simpleKeyRegexp = lcAlphaRegexp + lcAlphanumPunctRegexp + `*` + keyRegexp = `(?:(?:` + simpleKeyRegexp + `)|(?:` + multiTenantKeyRegexp + `))` + + // value = 0*255(chr) nblk-chr + // nblk-chr = %x21-2B / %x2D-3C / %x3E-7E + // chr = %x20 / nblk-chr + // + // Note the use of double-quoted strings in two places below. + // This is for \x expansion in these two cases. Also note + // \x2d is a hyphen character, so a quoted \ (i.e., \\\x2d) + // appears below. + valueNonblankCharRegexp = "[\x21-\x2b\\\x2d-\x3c\x3e-\x7e]" + valueCharRegexp = "[\x20-\x2b\\\x2d-\x3c\x3e-\x7e]" + valueRegexp = valueCharRegexp + `{0,255}` + valueNonblankCharRegexp + + // tracestate = list-member 0*31( OWS "," OWS list-member ) + // list-member = (key "=" value) / OWS + + owsCharSet = ` \t` + owsRegexp = `(?:[` + owsCharSet + `]*)` + w3cMemberRegexp = `(?:` + keyRegexp + `=` + valueRegexp + `)?` + + w3cOwsMemberOwsRegexp = `(?:` + owsRegexp + w3cMemberRegexp + owsRegexp + `)` + w3cCommaOwsMemberOwsRegexp = `(?:` + `,` + w3cOwsMemberOwsRegexp + `)` + + w3cTracestateRegexp = `^` + w3cOwsMemberOwsRegexp + w3cCommaOwsMemberOwsRegexp + `*$` + + // Note that fixed limits on tracestate size are captured above + // as '*' regular expressions, which allows the parser to exceed + // fixed limits, which are checked in code. This keeps the size + // of the compiled regexp reasonable. Some of the regexps above + // are too complex to expand e.g., 31 times. In the case of + // w3cTracestateRegexp, 32 elements are allowed, which means we + // want the w3cCommaOwsMemberOwsRegexp element to match at most + // 31 times, but this is checked in code. +) + +var ( + w3cTracestateRe = regexp.MustCompile(w3cTracestateRegexp) + + w3cSyntax = keyValueScanner{ + maxItems: hardMaxNumPairs, + trim: true, + separator: ',', + equality: '=', + } +) + +// NewW3CTraceState parses a W3C trace state, with special attention +// to the embedded OpenTelemetry trace state field. +func NewW3CTraceState(input string) (w3c W3CTraceState, _ error) { + if len(input) > hardMaxW3CLength { + return w3c, ErrTraceStateSize + } + + if !w3cTracestateRe.MatchString(input) { + return w3c, strconv.ErrSyntax + } + + err := w3cSyntax.scanKeyValues(input, func(key, value string) error { + if len(key) > hardMaxKeyLength { + return ErrTraceStateSize + } + if tenant, system, found := strings.Cut(key, multiTenantSep); found { + if len(tenant) > hardMaxTenantLength { + return ErrTraceStateSize + } + if len(system) > hardMaxSystemLength { + return ErrTraceStateSize + } + } + switch key { + case otelVendorCode: + var err error + w3c.otts, err = NewOpenTelemetryTraceState(value) + return err + default: + w3c.kvs = append(w3c.kvs, KV{ + Key: key, + Value: value, + }) + return nil + } + }) + return w3c, err +} + +// HasAnyValue indicates whether there are any values in this +// tracestate, including extra values. +func (w3c *W3CTraceState) HasAnyValue() bool { + return w3c.OTelValue().HasAnyValue() || len(w3c.ExtraValues()) != 0 +} + +// OTelValue returns the OpenTelemetry tracestate value. +func (w3c *W3CTraceState) OTelValue() *OpenTelemetryTraceState { + return &w3c.otts +} + +// Serialize encodes this tracestate object for use as a W3C +// tracestate header value. +func (w3c *W3CTraceState) Serialize(w io.StringWriter) error { + ser := serializer{writer: w} + cnt := 0 + sep := func() { + if cnt != 0 { + ser.write(",") + } + cnt++ + } + if w3c.otts.HasAnyValue() { + sep() + ser.write("ot=") + ser.check(w3c.otts.Serialize(w)) + } + for _, kv := range w3c.ExtraValues() { + sep() + ser.write(kv.Key) + ser.write("=") + ser.write(kv.Value) + } + return ser.err +} diff --git a/pkg/sampling/w3ctracestate_test.go b/pkg/sampling/w3ctracestate_test.go new file mode 100644 index 0000000000000..ec12774b9771b --- /dev/null +++ b/pkg/sampling/w3ctracestate_test.go @@ -0,0 +1,161 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sampling + +import ( + "errors" + "fmt" + "strconv" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +// ExampleW3CTraceState_Serialize shows how to parse and print a W3C +// tracestate. +func ExampleW3CTraceState() { + // This tracestate value encodes two sections, "ot" from + // OpenTelemetry and "zz" from a vendor. + w3c, err := NewW3CTraceState("ot=th:c;rv:d29d6a7215ced0;pn:abc,zz=vendorcontent") + if err != nil { + panic(err) + } + ot := w3c.OTelValue() + + fmt.Println("T-Value:", ot.TValue()) + fmt.Println("R-Value:", ot.RValue()) + fmt.Println("OTel Extra:", ot.ExtraValues()) + fmt.Println("Other Extra:", w3c.ExtraValues()) + + // Output: + // T-Value: c + // R-Value: d29d6a7215ced0 + // OTel Extra: [{pn abc}] + // Other Extra: [{zz vendorcontent}] +} + +// ExampleW3CTraceState_Serialize shows how to modify and serialize a +// new W3C tracestate. +func ExampleW3CTraceState_Serialize() { + w3c, err := NewW3CTraceState("") + if err != nil { + panic(err) + } + // Suppose a parent context was unsampled, the child span has + // been sampled at 25%. The child's context should carry the + // T-value of "c", serialize as "ot=th:c". + th, err := ProbabilityToThreshold(0.25) + if err != nil { + panic(err) + } + + // The update uses both the Threshold and its encoded string + // value, since in some code paths the Threshold will have + // just been parsed from a T-value, and in other code paths + // the T-value will be precalculated. + err = w3c.OTelValue().UpdateTValueWithSampling(th, th.TValue()) + if err != nil { + panic(err) + } + + var buf strings.Builder + err = w3c.Serialize(&buf) + if err != nil { + panic(err) + } + + fmt.Println(buf.String()) + + // Output: + // ot=th:c +} + +func TestParseW3CTraceState(t *testing.T) { + type testCase struct { + in string + rval string + tval string + extra map[string]string + expectErr error + } + const ns = "" + for _, test := range []testCase{ + // correct cases, with various whitespace + {"ot=th:1", ns, "1", nil, nil}, + {" ot=th:1 ", ns, "1", nil, nil}, + {" ot=th:1,other=value ", ns, "1", map[string]string{ + "other": "value", + }, nil}, + {",,,", ns, ns, nil, nil}, + {" , ot=th:1, , other=value ", ns, "1", map[string]string{ + "other": "value", + }, nil}, + {"ot=th:100;rv:abcdabcdabcdff", "abcdabcdabcdff", "100", nil, nil}, + {" ot=th:100;rv:abcdabcdabcdff", "abcdabcdabcdff", "100", nil, nil}, + {"ot=th:100;rv:abcdabcdabcdff ", "abcdabcdabcdff", "100", nil, nil}, + {"ot=rv:11111111111111", "11111111111111", ns, nil, nil}, + {"ot=rv:ffffffffffffff,unknown=value,other=something", "ffffffffffffff", ns, map[string]string{ + "other": "something", + "unknown": "value", + }, nil}, + + // syntax errors + {"-1=2", ns, ns, nil, strconv.ErrSyntax}, // invalid key char + {"=", ns, ns, nil, strconv.ErrSyntax}, // invalid empty key + + // size errors + {strings.Repeat("x", hardMaxKeyLength+1) + "=v", ns, ns, nil, ErrTraceStateSize}, // too long simple key + {strings.Repeat("x", hardMaxTenantLength+1) + "@y=v", ns, ns, nil, ErrTraceStateSize}, // too long multitenant-id + {"y@" + strings.Repeat("x", hardMaxSystemLength+1) + "=v", ns, ns, nil, ErrTraceStateSize}, // too long system-id + {"x=" + strings.Repeat("y", hardMaxW3CLength-1), ns, ns, nil, ErrTraceStateSize}, + {strings.Repeat("x=y,", hardMaxNumPairs) + "x=y", ns, ns, nil, ErrTraceStateSize}, + } { + t.Run(testName(test.in), func(t *testing.T) { + w3c, err := NewW3CTraceState(test.in) + + if test.expectErr != nil { + require.True(t, errors.Is(err, test.expectErr), + "%q: not expecting %v wanted %v", test.in, err, test.expectErr, + ) + } else { + require.NoError(t, err, "%q", test.in) + } + if test.rval != ns { + require.True(t, w3c.OTelValue().HasAnyValue()) + require.True(t, w3c.HasAnyValue()) + require.Equal(t, test.rval, w3c.OTelValue().RValue()) + } else { + require.Equal(t, "", w3c.OTelValue().RValue()) + } + if test.tval != ns { + require.True(t, w3c.OTelValue().HasAnyValue()) + require.True(t, w3c.HasAnyValue()) + require.NotEqual(t, "", w3c.OTelValue().TValue()) + require.Equal(t, test.tval, w3c.OTelValue().TValue()) + } else { + require.Equal(t, "", w3c.OTelValue().TValue()) + } + if test.extra != nil { + require.True(t, w3c.HasAnyValue()) + actual := map[string]string{} + for _, kv := range w3c.ExtraValues() { + actual[kv.Key] = kv.Value + } + require.Equal(t, test.extra, actual) + } + + if test.expectErr != nil { + return + } + // on success Serialize() should not modify + // test by re-parsing + var w strings.Builder + require.NoError(t, w3c.Serialize(&w)) + cpy, err := NewW3CTraceState(w.String()) + require.NoError(t, err, "with %v", w.String()) + require.Equal(t, w3c, cpy, "with %v", w.String()) + }) + } +} diff --git a/versions.yaml b/versions.yaml index b1b51f4306085..461476187f9a7 100644 --- a/versions.yaml +++ b/versions.yaml @@ -127,6 +127,7 @@ module-sets: - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/resourcetotelemetry + - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/azure