Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/sentry/api/serializers/models/exporteddata.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,5 @@ def serialize(self, obj, attrs, user, **kwargs):
"status": obj.status,
"checksum": checksum,
"fileName": file_name,
"export_format": obj.export_format,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

API response field uses inconsistent naming convention

Medium Severity

The new export_format key uses snake_case, while every other multi-word key in this serializer response uses camelCase (dateCreated, dateFinished, dateExpired, fileName). This inconsistency will be visible to API consumers and breaks the established naming convention. It likely needs to be "exportFormat" to match the rest of the response.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit d43ba64. Configure here.

}
8 changes: 8 additions & 0 deletions src/sentry/data_export/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,19 @@ class ExportQueryType:
ISSUES_BY_TAG = 0
DISCOVER = 1
EXPLORE = 2
TRACE_ITEM_FULL_EXPORT = 3
ISSUES_BY_TAG_STR = "Issues-by-Tag"
DISCOVER_STR = "Discover"
EXPLORE_STR = "Explore"
TRACE_ITEM_FULL_EXPORT_STR = "trace_item_full_export"

@classmethod
def as_choices(cls) -> tuple[tuple[int, str], ...]:
return (
(cls.ISSUES_BY_TAG, str(cls.ISSUES_BY_TAG_STR)),
(cls.DISCOVER, str(cls.DISCOVER_STR)),
(cls.EXPLORE, str(cls.EXPLORE_STR)),
(cls.TRACE_ITEM_FULL_EXPORT, str(cls.TRACE_ITEM_FULL_EXPORT_STR)),
)

@classmethod
Expand All @@ -47,6 +50,7 @@ def as_str_choices(cls) -> tuple[tuple[str, str], ...]:
(cls.ISSUES_BY_TAG_STR, cls.ISSUES_BY_TAG_STR),
(cls.DISCOVER_STR, cls.DISCOVER_STR),
(cls.EXPLORE_STR, cls.EXPLORE_STR),
(cls.TRACE_ITEM_FULL_EXPORT_STR, cls.TRACE_ITEM_FULL_EXPORT_STR),
)

@classmethod
Expand All @@ -57,6 +61,8 @@ def as_str(cls, integer: int) -> str:
return cls.DISCOVER_STR
elif integer == cls.EXPLORE:
return cls.EXPLORE_STR
elif integer == cls.TRACE_ITEM_FULL_EXPORT:
return cls.TRACE_ITEM_FULL_EXPORT_STR
raise ValueError(f"Invalid ExportQueryType: {integer}")

@classmethod
Expand All @@ -67,4 +73,6 @@ def from_str(cls, string: str) -> int:
return cls.DISCOVER
elif string == cls.EXPLORE_STR:
return cls.EXPLORE
elif string == cls.TRACE_ITEM_FULL_EXPORT_STR:
return cls.TRACE_ITEM_FULL_EXPORT
raise ValueError(f"Invalid ExportQueryType: {string}")
99 changes: 50 additions & 49 deletions src/sentry/data_export/endpoints/data_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ def _validate_dataset(self, query_type: str, query_info: dict[str, Any]) -> dict
dataset = dataset or "discover"
if dataset not in SUPPORTED_DATASETS:
raise serializers.ValidationError(f"{dataset} is not supported for exports")
elif query_type == ExportQueryType.EXPLORE_STR:
elif query_type in (
ExportQueryType.EXPLORE_STR,
ExportQueryType.TRACE_ITEM_FULL_EXPORT_STR,
):
if not dataset:
raise serializers.ValidationError(
f"Please specify dataset. Supported datasets for this query type are {str(SUPPORTED_TRACE_ITEM_DATASETS.keys())}."
Expand All @@ -78,27 +81,20 @@ def _validate_dataset(self, query_type: str, query_info: dict[str, Any]) -> dict
query_info["dataset"] = dataset
return query_info

def _validate_query_info(
self, query_type: str, query_info: dict[str, Any], *, export_format: str
) -> dict[str, Any]:
def _validate_query_info(self, query_type: str, query_info: dict[str, Any]) -> dict[str, Any]:
base_fields = query_info.get("field")
if base_fields is None:
base_fields = []
elif not isinstance(base_fields, list):
base_fields = [base_fields]

is_jsonl_trace_item_full_export = (
query_type == ExportQueryType.EXPLORE_STR
and export_format == OutputMode.JSONL.value
and len(base_fields) == 0
)
is_jsonl_trace_item_full_export = query_type == ExportQueryType.TRACE_ITEM_FULL_EXPORT_STR
Comment thread
cursor[bot] marked this conversation as resolved.
Comment thread
cursor[bot] marked this conversation as resolved.

if len(base_fields) > MAX_FIELDS:
detail = f"You can export up to {MAX_FIELDS} fields at a time. Please delete some and try again."
raise serializers.ValidationError(detail)
elif len(base_fields) == 0:
if not is_jsonl_trace_item_full_export:
raise serializers.ValidationError("at least one field is required to export")
elif len(base_fields) == 0 and not is_jsonl_trace_item_full_export:
raise serializers.ValidationError("at least one field is required to export")

if "query" not in query_info:
if is_jsonl_trace_item_full_export:
Expand Down Expand Up @@ -136,7 +132,10 @@ def _validate_query_info(
query_info["start"] = start.isoformat()
query_info["end"] = end.isoformat()

if query_type == ExportQueryType.EXPLORE_STR:
if (
query_type == ExportQueryType.EXPLORE_STR
or query_type == ExportQueryType.TRACE_ITEM_FULL_EXPORT_STR
):
sort = query_info.get("sort", [])
if sort and isinstance(sort, str):
sort = [sort]
Expand Down Expand Up @@ -173,9 +172,7 @@ def validate(self, data: dict[str, Any]) -> dict[str, Any]:
export_format = data.get("format", OutputMode.CSV.value)

if query_type == ExportQueryType.DISCOVER_STR:
query_info = self._validate_query_info(
query_type, query_info, export_format=export_format
)
query_info = self._validate_query_info(query_type, query_info)
query_info = self._validate_dataset(query_type, query_info)
dataset = query_info["dataset"]

Expand Down Expand Up @@ -210,43 +207,44 @@ def validate(self, data: dict[str, Any]) -> dict[str, Any]:
raise serializers.ValidationError("Invalid search query.")

elif query_type == ExportQueryType.EXPLORE_STR:
query_info = self._validate_query_info(
query_type, query_info, export_format=export_format
)
query_info = self._validate_query_info(query_type, query_info)
query_info = self._validate_dataset(query_type, query_info)
explore_output_mode = OutputMode.from_value(export_format)
is_full_jsonl_trace_item_export = (
export_format == OutputMode.JSONL.value and len(query_info.get("field", [])) == 0
)
if not is_full_jsonl_trace_item_export:
try:
explore_processor = ExploreProcessor(
explore_query=query_info,
organization=organization,
output_mode=explore_output_mode,
)
sort = query_info.get("sort", [])
orderby = [sort] if isinstance(sort, str) else sort

explore_processor.validate_export_query(
rpc_dataset_common.TableQuery(
query_string=query_info["query"],
selected_columns=query_info["field"],
orderby=orderby,
offset=0,
limit=1,
referrer=Referrer.DATA_EXPORT_TASKS_EXPLORE,
sampling_mode=explore_processor.sampling_mode,
resolver=explore_processor.search_resolver,
equations=query_info.get("equations", []),
)
try:
explore_processor = ExploreProcessor(
explore_query=query_info,
organization=organization,
output_mode=explore_output_mode,
)
sort = query_info.get("sort", [])
orderby = [sort] if isinstance(sort, str) else sort

explore_processor.validate_export_query(
rpc_dataset_common.TableQuery(
query_string=query_info["query"],
selected_columns=query_info["field"],
orderby=orderby,
offset=0,
limit=1,
referrer=Referrer.DATA_EXPORT_TASKS_EXPLORE,
Comment thread
manessaraj marked this conversation as resolved.
sampling_mode=explore_processor.sampling_mode,
resolver=explore_processor.search_resolver,
equations=query_info.get("equations", []),
)
except InvalidSearchQuery as err:
sentry_sdk.capture_exception(err)
raise serializers.ValidationError("Invalid table query.")
)
except InvalidSearchQuery as err:
sentry_sdk.capture_exception(err)
raise serializers.ValidationError("Invalid table query.")
elif query_type == ExportQueryType.TRACE_ITEM_FULL_EXPORT_STR:
query_info = self._validate_query_info(query_type, query_info)
query_info = self._validate_dataset(query_type, query_info)
Comment thread
sentry[bot] marked this conversation as resolved.
explore_output_mode = OutputMode.from_value(export_format)
if explore_output_mode != OutputMode.JSONL:
raise serializers.ValidationError("For full export, output mode must be JSONL.")

elif data["query_type"] == ExportQueryType.ISSUES_BY_TAG_STR:
issues_by_tag_validate(query_info)
data["query_info"] = query_info
return data


Expand Down Expand Up @@ -287,7 +285,7 @@ def _parse_limit(self, data: dict[str, Any]) -> tuple[int | None, bool]:
run_sync = (
limit is not None
and limit <= MAX_SYNC_LIMIT
and data["query_type"] == ExportQueryType.EXPLORE_STR
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sync execution removed for EXPLORE queries on logs

Low Severity

The run_sync condition was changed from EXPLORE_STR to only TRACE_ITEM_FULL_EXPORT_STR, which means regular EXPLORE queries on the logs dataset with a small limit no longer run synchronously. Elsewhere in this PR, when behavior was shared between the two types, both were included (e.g., feature checks, dataset validation, sort handling). This replacement instead of addition looks inconsistent with the rest of the PR's pattern.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit d43ba64. Configure here.

and data["query_type"] == ExportQueryType.TRACE_ITEM_FULL_EXPORT_STR
Comment thread
manessaraj marked this conversation as resolved.
Comment thread
cursor[bot] marked this conversation as resolved.
and data["query_info"].get("dataset") == "logs"
)
return limit, run_sync
Expand All @@ -308,7 +306,10 @@ def post(self, request: Request, organization: Organization) -> Response:
# The data export feature is only available alongside `discover-query` (except for explore).
# So to export issue tags, they must have have `discover-query`
if not features.has("organizations:discover-query", organization):
if request.data.get("query_type") != ExportQueryType.EXPLORE_STR:
if request.data.get("query_type") not in {
ExportQueryType.EXPLORE_STR,
ExportQueryType.TRACE_ITEM_FULL_EXPORT_STR,
}:
return Response(status=404)

# Get environment_id and limit if available
Expand Down
42 changes: 19 additions & 23 deletions src/sentry/data_export/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,6 @@ def _export_metric_tags(data_export: ExportedData) -> dict[str, str]:
}


def _is_full_jsonl_trace_item_export(data_export: ExportedData, output_mode: OutputMode) -> bool:
return (
data_export.query_type == ExportQueryType.EXPLORE
and output_mode == OutputMode.JSONL
and len(data_export.query_info.get("field", [])) == 0
)


def _page_token_b64_from_processor(
processor: IssuesByTagProcessor | DiscoverProcessor | ExploreProcessor,
) -> str | None:
Expand Down Expand Up @@ -464,25 +456,26 @@ def get_processor(
organization=data_export.organization,
)
elif data_export.query_type == ExportQueryType.EXPLORE:
if _is_full_jsonl_trace_item_export(data_export, output_mode):
Comment thread
cursor[bot] marked this conversation as resolved.
page_token: bytes | None = None
if page_token_b64:
try:
page_token = base64.b64decode(page_token_b64)
except (ValueError, TypeError) as e:
raise ExportError("Invalid export trace item pagination state.") from e
return TraceItemFullExportProcessor(
explore_query=data_export.query_info,
organization=data_export.organization,
output_mode=output_mode,
page_token=page_token,
last_emitted_item_id_hex=last_emitted_item_id_hex,
)
return ExploreProcessor(
explore_query=data_export.query_info,
organization=data_export.organization,
output_mode=output_mode,
)
elif data_export.query_type == ExportQueryType.TRACE_ITEM_FULL_EXPORT:
page_token: bytes | None = None
if page_token_b64:
try:
page_token = base64.b64decode(page_token_b64)
except (ValueError, TypeError) as e:
raise ExportError("Invalid export trace item pagination state.") from e
return TraceItemFullExportProcessor(
explore_query=data_export.query_info,
organization=data_export.organization,
output_mode=output_mode,
page_token=page_token,
last_emitted_item_id_hex=last_emitted_item_id_hex,
)

else:
raise ExportError(f"No processor found for this query type: {data_export.query_type}")
except ExportError as error:
Expand All @@ -502,7 +495,10 @@ def process_rows(
rows = process_issues_by_tag(processor, batch_size, offset)
elif data_export.query_type == ExportQueryType.DISCOVER:
rows = process_discover(processor, batch_size, offset)
elif data_export.query_type == ExportQueryType.EXPLORE:
elif (
data_export.query_type == ExportQueryType.EXPLORE
or data_export.query_type == ExportQueryType.TRACE_ITEM_FULL_EXPORT
):
rows = process_explore(processor, batch_size, offset)
else:
raise ExportError(f"No processor found for this query type: {data_export.query_type}")
Expand Down
3 changes: 3 additions & 0 deletions tests/sentry/data_export/endpoints/test_data_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from sentry.data_export.base import ExportQueryType, ExportStatus
from sentry.data_export.models import ExportedData
from sentry.data_export.writers import OutputMode
from sentry.search.utils import parse_datetime_string
from sentry.testutils.cases import APITestCase
from sentry.testutils.helpers.datetime import freeze_time
Expand Down Expand Up @@ -102,6 +103,7 @@ def test_new_export(self) -> None:
},
"status": ExportStatus.Early,
"checksum": None,
"export_format": OutputMode.CSV.value,
"fileName": None,
}

Expand Down Expand Up @@ -132,6 +134,7 @@ def test_progress_export(self) -> None:
},
"status": data_export.status,
"checksum": None,
"export_format": OutputMode.CSV.value,
"fileName": None,
}

Expand Down
4 changes: 2 additions & 2 deletions tests/sentry/data_export/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,7 @@ def _explore_logs_jsonl_rich_field_api_request_body(
self, start: str, end: str, *, limit: int | None = None
) -> dict[str, Any]:
body: dict[str, Any] = {
"query_type": ExportQueryType.EXPLORE_STR,
"query_type": ExportQueryType.TRACE_ITEM_FULL_EXPORT_STR,
"format": OutputMode.JSONL.value,
"query_info": {
"project": [self.project.id],
Expand Down Expand Up @@ -906,7 +906,7 @@ def _assert_explore_logs_jsonl_export_create_payload(
) -> ExportedData:
de = ExportedData.objects.get(id=payload["id"])
assert de.user_id == self.user.id
assert de.query_type == ExportQueryType.EXPLORE
assert de.query_type == ExportQueryType.TRACE_ITEM_FULL_EXPORT
assert de.export_format == OutputMode.JSONL.value
assert de.query_info["dataset"] == "logs"
return de
Expand Down
Loading