Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions src/sentry/search/events/datasets/discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,64 @@ def function_converter(self) -> Mapping[str, SnQLFunction]:
default_result_type="number",
private=True,
),
SnQLFunction(
"fn_span_exclusive_time",
Comment thread
udameli marked this conversation as resolved.
required_args=[
SnQLStringArg("spans_op", True, True),
SnQLStringArg("spans_group"),
SnQLStringArg("fn"),
],
snql_column=lambda args, alias: Function(
args["fn"],
[
Function(
"arrayJoin",
[
Function(
"arrayFilter",
[
Lambda(
[
"x",

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure, but is x required if we're not using it in the lambda?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it was mostly because arrayFilter returns the first column, so exclusive_time has to be selected as x here

"y",
"z",
],
Function(
"and",
[
Function(
"equals",
[
Identifier("y"),
args["spans_op"],
],
),
Function(
"equals",
[
Identifier(
"z",
),
args["spans_group"],
],
),
],
),
),
Column("spans.exclusive_time"),
Column("spans.op"),
Column("spans.group"),
],
)
],
"exclusive_time",
)
],
alias,
),
default_result_type="number",
private=True,
),
]
}

Expand Down
90 changes: 81 additions & 9 deletions src/sentry/snuba/discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -1362,20 +1362,19 @@ def spans_histogram_query(
"""
multiplier = int(10 ** precision)
if max_value is not None:
# We want the specified max_value to be exclusive, and the queried max_value
# to be inclusive. So we adjust the specified max_value using the multiplier.
max_value -= 0.1 / multiplier

# TODO add min max calculation after
# min_value, max_value = find_histogram_min_max(
# fields, min_value, max_value, user_query, params, data_filter, use_snql
# )
min_value, max_value = find_span_histogram_min_max(
span, min_value, max_value, user_query, params, data_filter
)

key_column = None
field_names = []
histogram_rows = None

# TODO calculate histogram_params
# histogram_params = find_histogram_params(num_buckets, min_value, max_value, multiplier)
histogram_params = HistogramParams(num_buckets=100, bucket_size=1, start_offset=0, multiplier=1)
histogram_params = find_histogram_params(num_buckets, min_value, max_value, multiplier)
histogram_column = get_span_histogram_column(span, histogram_params)

builder = HistogramQueryBuilder(
Expand All @@ -1401,7 +1400,7 @@ def spans_histogram_query(
if not normalize_results:
return results

return normalize_span_histogram_resutls(span, histogram_params, results)
return normalize_span_histogram_results(span, histogram_params, results)


def histogram_query(
Expand Down Expand Up @@ -1640,6 +1639,79 @@ def find_histogram_params(num_buckets, min_value, max_value, multiplier):
return HistogramParams(num_buckets, bucket_size, start_offset, multiplier)


def find_span_histogram_min_max(span, min_value, max_value, user_query, params, data_filter=None):

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we be merging this with the original? or decompose the original into functions which they can both compose from? It feels like a lot of this code is almost identical minus the columns/span info and lack of data filter?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah I've noticed it too. I can follow up with a clean up

"""
Find the min/max value of the specified span. If either min/max is already
specified, it will be used and not queried for.

:param [Span] span: A span for which you want to generate the histograms for.
:param float min_value: The minimum value allowed to be in the histogram.
If left unspecified, it is queried using `user_query` and `params`.
:param float max_value: The maximum value allowed to be in the histogram.
If left unspecified, it is queried using `user_query` and `params`.
:param str user_query: Filter query string to create conditions from.
:param {str: str} params: Filtering parameters with start, end, project_id, environment
:param str data_filter: Indicate the filter strategy to be applied to the data.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You aren't using data_filter.. yet?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not yet, I was gonna get more context on it before implementing it. I wonder under what circumstances do we want to exclude outliers for spans histogram. Doesn't it make sense to just show all data?

"""

if min_value is not None and max_value is not None:
return min_value, max_value

selected_columns = []
min_column = ""
max_column = ""
if min_value is None:
min_column = f'fn_span_exclusive_time("{span.op}", {span.group}, min)'
selected_columns.append(min_column)
if max_value is None:
max_column = f'fn_span_exclusive_time("{span.op}", {span.group}, max)'
selected_columns.append(max_column)

referrer = "api.organization-spans-histogram-min-max"
builder = QueryBuilder(
dataset=Dataset.Discover,
params=params,
selected_columns=selected_columns,
query=user_query,
limit=1,
functions_acl=["fn_span_exclusive_time"],
)

results = builder.run_query(referrer)
data = results.get("data")

# there should be exactly 1 row in the results, but if something went wrong here,
# we force the min/max to be None to coerce an empty histogram
if data is None or len(data) != 1:
return None, None

row = data[0]

if min_value is None:
calculated_min_value = row[get_function_alias(min_column)]
min_value = calculated_min_value if calculated_min_value else None
if max_value is not None and min_value is not None:
# max_value was provided by the user, and min_value was queried.
# If min_value > max_value, then we adjust min_value with respect to
# max_value. The rationale is that if the user provided max_value,
# then any and all data above max_value should be ignored since it is
# and upper bound.
min_value = min([max_value, min_value])

if max_value is None:
calculated_max_value = row[get_function_alias(max_column)]
max_value = calculated_max_value if calculated_max_value else None

if max_value is not None and min_value is not None:
# min_value may be either queried or provided by the user. max_value was queried.
# If min_value > max_value, then max_value should be adjusted with respect to
# min_value, since min_value is a lower bound, and any and all data below
# min_value should be ignored.
max_value = max([max_value, min_value])

return min_value, max_value


def find_histogram_min_max(
fields, min_value, max_value, user_query, params, data_filter=None, use_snql=False
):
Expand Down Expand Up @@ -1743,7 +1815,7 @@ def find_histogram_min_max(
return min_value, max_value


def normalize_span_histogram_resutls(span, histogram_params, results):
def normalize_span_histogram_results(span, histogram_params, results):
"""
Normalizes the span histogram results by renaming the columns to key and bin
and make sure to zerofill any missing values.
Expand Down
Loading