-
Notifications
You must be signed in to change notification settings - Fork 567
feat(tracing): Port sample_rand to POTel
#4106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e53c7ec
5f3051e
d22d8a4
65935f5
bac69a7
b0e6d10
a482084
9b24fe5
49a3507
3d4953e
1332ebc
4fe4b2e
8d14de5
50db70f
67ca0e5
248306f
ab3d868
d52b361
fc610e3
541a83d
46e6f9b
b57ee5e
8d917f5
d2055d9
8523fde
948b926
66c4b6a
507c21f
8010e46
172acaf
2fe50f1
221bed4
3541762
d49038b
5c20b12
5db195a
1b118cf
8754c5e
8830e34
6abb38c
3246d38
6ef0ebd
a0f1a35
34cb097
be19ff4
9711c99
bd6312f
d9fe5f9
9fe8606
9380196
c3d0156
c18cdf0
185cc15
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -1,15 +1,19 @@ | ||||||
| import random | ||||||
| from decimal import Decimal | ||||||
| from typing import cast | ||||||
|
|
||||||
| from opentelemetry import trace | ||||||
| from opentelemetry.sdk.trace.sampling import Sampler, SamplingResult, Decision | ||||||
| from opentelemetry.trace.span import TraceState | ||||||
|
|
||||||
| import sentry_sdk | ||||||
| from sentry_sdk.tracing_utils import has_tracing_enabled | ||||||
| from sentry_sdk.tracing_utils import ( | ||||||
| _generate_sample_rand, | ||||||
| has_tracing_enabled, | ||||||
| ) | ||||||
| from sentry_sdk.utils import is_valid_sample_rate, logger | ||||||
| from sentry_sdk.integrations.opentelemetry.consts import ( | ||||||
| TRACESTATE_SAMPLED_KEY, | ||||||
| TRACESTATE_SAMPLE_RAND_KEY, | ||||||
| TRACESTATE_SAMPLE_RATE_KEY, | ||||||
| SentrySpanAttribute, | ||||||
| ) | ||||||
|
|
@@ -70,23 +74,40 @@ def get_parent_sample_rate(parent_context, trace_id): | |||||
| return None | ||||||
|
|
||||||
|
|
||||||
| def dropped_result(parent_span_context, attributes, sample_rate=None): | ||||||
| # type: (SpanContext, Attributes, Optional[float]) -> SamplingResult | ||||||
| # these will only be added the first time in a root span sampling decision | ||||||
| # if sample_rate is provided, it'll be updated in trace state | ||||||
| trace_state = parent_span_context.trace_state | ||||||
| def get_parent_sample_rand(parent_context, trace_id): | ||||||
| # type: (Optional[SpanContext], int) -> Optional[Decimal] | ||||||
| if parent_context is None: | ||||||
| return None | ||||||
|
|
||||||
| if TRACESTATE_SAMPLED_KEY not in trace_state: | ||||||
| trace_state = trace_state.add(TRACESTATE_SAMPLED_KEY, "false") | ||||||
| elif trace_state.get(TRACESTATE_SAMPLED_KEY) == "deferred": | ||||||
| trace_state = trace_state.update(TRACESTATE_SAMPLED_KEY, "false") | ||||||
| is_span_context_valid = parent_context is not None and parent_context.is_valid | ||||||
|
|
||||||
| if sample_rate is not None: | ||||||
| trace_state = trace_state.update(TRACESTATE_SAMPLE_RATE_KEY, str(sample_rate)) | ||||||
| if is_span_context_valid and parent_context.trace_id == trace_id: | ||||||
| parent_sample_rand = parent_context.trace_state.get(TRACESTATE_SAMPLE_RAND_KEY) | ||||||
| if parent_sample_rand is None: | ||||||
| return None | ||||||
|
|
||||||
| is_root_span = not ( | ||||||
| parent_span_context.is_valid and not parent_span_context.is_remote | ||||||
| return Decimal(parent_sample_rand) | ||||||
|
|
||||||
| return None | ||||||
|
|
||||||
|
|
||||||
| def dropped_result(span_context, attributes, sample_rate=None, sample_rand=None): | ||||||
| # type: (SpanContext, Attributes, Optional[float], Optional[Decimal]) -> SamplingResult | ||||||
| """ | ||||||
| React to a span getting unsampled and return a DROP SamplingResult. | ||||||
|
|
||||||
| Update the trace_state with the effective sampled, sample_rate and sample_rand, | ||||||
| record that we dropped the event for client report purposes, and return | ||||||
| an OTel SamplingResult with Decision.DROP. | ||||||
|
|
||||||
| See for more info about OTel sampling: | ||||||
| https://opentelemetry-python.readthedocs.io/en/latest/sdk/trace.sampling.html | ||||||
| """ | ||||||
| trace_state = _update_trace_state( | ||||||
| span_context, sampled=False, sample_rate=sample_rate, sample_rand=sample_rand | ||||||
| ) | ||||||
|
|
||||||
| is_root_span = not (span_context.is_valid and not span_context.is_remote) | ||||||
| if is_root_span: | ||||||
| # Tell Sentry why we dropped the transaction/root-span | ||||||
| client = sentry_sdk.get_client() | ||||||
|
|
@@ -108,19 +129,20 @@ def dropped_result(parent_span_context, attributes, sample_rate=None): | |||||
| ) | ||||||
|
|
||||||
|
|
||||||
| def sampled_result(span_context, attributes, sample_rate): | ||||||
| # type: (SpanContext, Attributes, Optional[float]) -> SamplingResult | ||||||
| # these will only be added the first time in a root span sampling decision | ||||||
| # if sample_rate is provided, it'll be updated in trace state | ||||||
| trace_state = span_context.trace_state | ||||||
| def sampled_result(span_context, attributes, sample_rate=None, sample_rand=None): | ||||||
sentrivana marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| # type: (SpanContext, Attributes, Optional[float], Optional[Decimal]) -> SamplingResult | ||||||
| """ | ||||||
| React to a span being sampled and return a sampled SamplingResult. | ||||||
|
|
||||||
| if TRACESTATE_SAMPLED_KEY not in trace_state: | ||||||
| trace_state = trace_state.add(TRACESTATE_SAMPLED_KEY, "true") | ||||||
| elif trace_state.get(TRACESTATE_SAMPLED_KEY) == "deferred": | ||||||
| trace_state = trace_state.update(TRACESTATE_SAMPLED_KEY, "true") | ||||||
| Update the trace_state with the effective sampled, sample_rate and sample_rand, | ||||||
| and return an OTel SamplingResult with Decision.RECORD_AND_SAMPLE. | ||||||
|
|
||||||
| if sample_rate is not None: | ||||||
| trace_state = trace_state.update(TRACESTATE_SAMPLE_RATE_KEY, str(sample_rate)) | ||||||
| See for more info about OTel sampling: | ||||||
| https://opentelemetry-python.readthedocs.io/en/latest/sdk/trace.sampling.html | ||||||
| """ | ||||||
| trace_state = _update_trace_state( | ||||||
| span_context, sampled=True, sample_rate=sample_rate, sample_rand=sample_rand | ||||||
| ) | ||||||
|
|
||||||
| return SamplingResult( | ||||||
| Decision.RECORD_AND_SAMPLE, | ||||||
|
|
@@ -129,6 +151,27 @@ def sampled_result(span_context, attributes, sample_rate): | |||||
| ) | ||||||
|
|
||||||
|
|
||||||
| def _update_trace_state(span_context, sampled, sample_rate=None, sample_rand=None): | ||||||
| # type: (SpanContext, bool, Optional[float], Optional[Decimal]) -> TraceState | ||||||
| trace_state = span_context.trace_state | ||||||
|
|
||||||
| sampled = "true" if sampled else "false" | ||||||
| if TRACESTATE_SAMPLED_KEY not in trace_state: | ||||||
| trace_state = trace_state.add(TRACESTATE_SAMPLED_KEY, sampled) | ||||||
| elif trace_state.get(TRACESTATE_SAMPLED_KEY) == "deferred": | ||||||
| trace_state = trace_state.update(TRACESTATE_SAMPLED_KEY, sampled) | ||||||
|
|
||||||
| if sample_rate is not None: | ||||||
| trace_state = trace_state.update(TRACESTATE_SAMPLE_RATE_KEY, str(sample_rate)) | ||||||
|
|
||||||
| if sample_rand is not None: | ||||||
| trace_state = trace_state.update( | ||||||
| TRACESTATE_SAMPLE_RAND_KEY, f"{sample_rand:.6f}" # noqa: E231 | ||||||
| ) | ||||||
|
|
||||||
|
Comment on lines
+167
to
+171
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [optional] since it seems like we are setting the sample_rand both here and above in If you think this change would be too much hassle, feel free to ignore this suggestion
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||
| return trace_state | ||||||
|
|
||||||
|
|
||||||
| class SentrySampler(Sampler): | ||||||
| def should_sample( | ||||||
| self, | ||||||
|
|
@@ -156,6 +199,18 @@ def should_sample( | |||||
|
|
||||||
| sample_rate = None | ||||||
|
|
||||||
| parent_sampled = get_parent_sampled(parent_span_context, trace_id) | ||||||
| parent_sample_rate = get_parent_sample_rate(parent_span_context, trace_id) | ||||||
| parent_sample_rand = get_parent_sample_rand(parent_span_context, trace_id) | ||||||
|
|
||||||
| if parent_sample_rand is not None: | ||||||
| # We have a sample_rand on the incoming trace or we already backfilled | ||||||
| # it in PropagationContext | ||||||
| sample_rand = parent_sample_rand | ||||||
| else: | ||||||
| # We are the head SDK and we need to generate a new sample_rand | ||||||
| sample_rand = cast(Decimal, _generate_sample_rand(str(trace_id), (0, 1))) | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This Could we please try to modify
Suggested change
|
||||||
|
|
||||||
| # Explicit sampled value provided at start_span | ||||||
| custom_sampled = cast( | ||||||
| "Optional[bool]", attributes.get(SentrySpanAttribute.CUSTOM_SAMPLED) | ||||||
|
|
@@ -165,11 +220,17 @@ def should_sample( | |||||
| sample_rate = float(custom_sampled) | ||||||
| if sample_rate > 0: | ||||||
| return sampled_result( | ||||||
| parent_span_context, attributes, sample_rate=sample_rate | ||||||
| parent_span_context, | ||||||
| attributes, | ||||||
| sample_rate=sample_rate, | ||||||
| sample_rand=sample_rand, | ||||||
| ) | ||||||
| else: | ||||||
| return dropped_result( | ||||||
| parent_span_context, attributes, sample_rate=sample_rate | ||||||
| parent_span_context, | ||||||
| attributes, | ||||||
| sample_rate=sample_rate, | ||||||
| sample_rand=sample_rand, | ||||||
| ) | ||||||
| else: | ||||||
| logger.debug( | ||||||
|
|
@@ -190,8 +251,6 @@ def should_sample( | |||||
| sample_rate_to_propagate = sample_rate | ||||||
| else: | ||||||
| # Check if there is a parent with a sampling decision | ||||||
| parent_sampled = get_parent_sampled(parent_span_context, trace_id) | ||||||
| parent_sample_rate = get_parent_sample_rate(parent_span_context, trace_id) | ||||||
| if parent_sampled is not None: | ||||||
| sample_rate = bool(parent_sampled) | ||||||
| sample_rate_to_propagate = ( | ||||||
|
|
@@ -215,17 +274,23 @@ def should_sample( | |||||
| if client.monitor.downsample_factor > 0: | ||||||
| sample_rate_to_propagate = sample_rate | ||||||
|
|
||||||
| # Roll the dice on sample rate | ||||||
| # Compare sample_rand to sample_rate to make the final sampling decision | ||||||
| sample_rate = float(cast("Union[bool, float, int]", sample_rate)) | ||||||
| sampled = random.random() < sample_rate | ||||||
| sampled = sample_rand < sample_rate | ||||||
|
|
||||||
| if sampled: | ||||||
| return sampled_result( | ||||||
| parent_span_context, attributes, sample_rate=sample_rate_to_propagate | ||||||
| parent_span_context, | ||||||
| attributes, | ||||||
| sample_rate=sample_rate_to_propagate, | ||||||
| sample_rand=None if sample_rand == parent_sample_rand else sample_rand, | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [question] What is the purpose of comparing
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ngl I am still a bit confused. Didn't we get rid of the code that would potentially change the precision?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We got rid of the rounding code but not the precision code, we still format
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In any case, the bigger picture here is just: if we have a halfway valid incoming sample_rand, don't tamper with it, even if it'd mean just adding a few zeroes at the end.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. okay, fair enough |
||||||
| ) | ||||||
| else: | ||||||
| return dropped_result( | ||||||
| parent_span_context, attributes, sample_rate=sample_rate_to_propagate | ||||||
| parent_span_context, | ||||||
| attributes, | ||||||
| sample_rate=sample_rate_to_propagate, | ||||||
| sample_rand=None if sample_rand == parent_sample_rand else sample_rand, | ||||||
| ) | ||||||
|
|
||||||
| def get_description(self) -> str: | ||||||
|
|
||||||
Uh oh!
There was an error while loading. Please reload this page.