From 5a45008b49597200c2c56d7f3949b8f1f06cfa7f Mon Sep 17 00:00:00 2001 From: Michelle Zhang <56095982+michellewzhang@users.noreply.github.com> Date: Fri, 14 Nov 2025 10:33:46 -0800 Subject: [PATCH 1/4] fix(replay): validate replay start/end for summaries --- .../endpoints/project_replay_summary.py | 11 ++- .../endpoints/test_project_replay_summary.py | 86 +++++++++++++++++++ 2 files changed, 94 insertions(+), 3 deletions(-) diff --git a/src/sentry/replays/endpoints/project_replay_summary.py b/src/sentry/replays/endpoints/project_replay_summary.py index 3dfec583f1039e..caab35da24fdbb 100644 --- a/src/sentry/replays/endpoints/project_replay_summary.py +++ b/src/sentry/replays/endpoints/project_replay_summary.py @@ -194,10 +194,15 @@ def post(self, request: Request, project: Project, replay_id: str) -> Response: status=404, ) - # We expect the start and end times to be present, and error + respond 500 if they're not. + # Extract start and end times from the replay, falling back to 90-day defaults if missing. replay = process_raw_response(snuba_response, fields=[])[0] - replay_start = datetime.fromisoformat(replay.get("started_at") or "") - replay_end = datetime.fromisoformat(replay.get("finished_at") or "") + default_start, default_end = default_start_end_dates() + + started_at = replay.get("started_at") + replay_start = datetime.fromisoformat(started_at) if started_at else default_start + + finished_at = replay.get("finished_at") + replay_end = datetime.fromisoformat(finished_at) if finished_at else default_end return self.make_seer_request( SEER_START_TASK_ENDPOINT_PATH, diff --git a/tests/sentry/replays/endpoints/test_project_replay_summary.py b/tests/sentry/replays/endpoints/test_project_replay_summary.py index b2e1eadaf0f931..2f9b6466ba9e69 100644 --- a/tests/sentry/replays/endpoints/test_project_replay_summary.py +++ b/tests/sentry/replays/endpoints/test_project_replay_summary.py @@ -7,6 +7,7 @@ from django.conf import settings from django.urls import reverse +from sentry.api.utils import default_start_end_dates from sentry.replays.endpoints.project_replay_summary import ( SEER_POLL_STATE_ENDPOINT_PATH, SEER_START_TASK_ENDPOINT_PATH, @@ -144,6 +145,91 @@ def test_post_simple(self, mock_make_seer_api_request: Mock) -> None: assert request_body["project_id"] == self.project.id assert request_body["temperature"] is None + @patch("sentry.replays.endpoints.project_replay_summary.process_raw_response") + @patch("sentry.replays.endpoints.project_replay_summary.query_replay_instance") + @patch("sentry.replays.endpoints.project_replay_summary.make_signed_seer_api_request") + def test_post_with_only_start_timestamp_missing( + self, + mock_make_seer_api_request: Mock, + mock_query_replay_instance: Mock, + mock_process_raw_response: Mock, + ) -> None: + """Test that when only started_at is None, we use default for start but keep the actual end.""" + mock_make_seer_api_request.return_value = MockSeerResponse( + 200, json_data={"hello": "world"} + ) + mock_query_replay_instance.return_value = [{"data": "mock"}] + + specific_end = datetime.now(UTC) - timedelta(days=1) + mock_process_raw_response.return_value = [ + { + "started_at": None, + "finished_at": specific_end.isoformat(), + } + ] + + with self.feature(self.features): + response = self.client.post( + self.url, data={"num_segments": 2}, content_type="application/json" + ) + + assert response.status_code == 200 + call_args = mock_make_seer_api_request.call_args + request_body = json.loads(call_args[1]["body"].decode()) + + default_start, default_end = default_start_end_dates() + + # Verify start uses default but end uses actual value + assert abs( + datetime.fromisoformat(request_body["replay_start"]) - default_start + ) <= timedelta(seconds=1) + assert abs(datetime.fromisoformat(request_body["replay_end"]) - specific_end) <= timedelta( + seconds=1 + ) + + @patch("sentry.replays.endpoints.project_replay_summary.process_raw_response") + @patch("sentry.replays.endpoints.project_replay_summary.query_replay_instance") + @patch("sentry.replays.endpoints.project_replay_summary.make_signed_seer_api_request") + def test_post_with_only_end_timestamp_missing( + self, + mock_make_seer_api_request: Mock, + mock_query_replay_instance: Mock, + mock_process_raw_response: Mock, + ) -> None: + """Test that when only finished_at is None, we use default for end but keep the actual start.""" + mock_make_seer_api_request.return_value = MockSeerResponse( + 200, json_data={"hello": "world"} + ) + mock_query_replay_instance.return_value = [{"data": "mock"}] + + # Set a specific start time but no end time + specific_start = datetime.now(UTC) - timedelta(days=2) + mock_process_raw_response.return_value = [ + { + "started_at": specific_start.isoformat(), + "finished_at": None, + } + ] + + with self.feature(self.features): + response = self.client.post( + self.url, data={"num_segments": 2}, content_type="application/json" + ) + + assert response.status_code == 200 + call_args = mock_make_seer_api_request.call_args + request_body = json.loads(call_args[1]["body"].decode()) + + default_start, default_end = default_start_end_dates() + + # Verify start uses actual value but end uses default + assert abs( + datetime.fromisoformat(request_body["replay_start"]) - specific_start + ) <= timedelta(seconds=1) + assert abs(datetime.fromisoformat(request_body["replay_end"]) - default_end) <= timedelta( + seconds=1 + ) + def test_post_replay_not_found(self) -> None: with self.feature(self.features): response = self.client.post( From a19a619e83b27a280673f0dae5f5a0ea0eb8cdae Mon Sep 17 00:00:00 2001 From: Michelle Zhang <56095982+michellewzhang@users.noreply.github.com> Date: Fri, 14 Nov 2025 10:45:58 -0800 Subject: [PATCH 2/4] :recycle: pass none --- .../endpoints/project_replay_summary.py | 5 +- .../endpoints/test_project_replay_summary.py | 86 ------------------- 2 files changed, 2 insertions(+), 89 deletions(-) diff --git a/src/sentry/replays/endpoints/project_replay_summary.py b/src/sentry/replays/endpoints/project_replay_summary.py index caab35da24fdbb..5dc36fa6433202 100644 --- a/src/sentry/replays/endpoints/project_replay_summary.py +++ b/src/sentry/replays/endpoints/project_replay_summary.py @@ -196,13 +196,12 @@ def post(self, request: Request, project: Project, replay_id: str) -> Response: # Extract start and end times from the replay, falling back to 90-day defaults if missing. replay = process_raw_response(snuba_response, fields=[])[0] - default_start, default_end = default_start_end_dates() started_at = replay.get("started_at") - replay_start = datetime.fromisoformat(started_at) if started_at else default_start + replay_start = datetime.fromisoformat(started_at) if started_at else None finished_at = replay.get("finished_at") - replay_end = datetime.fromisoformat(finished_at) if finished_at else default_end + replay_end = datetime.fromisoformat(finished_at) if finished_at else None return self.make_seer_request( SEER_START_TASK_ENDPOINT_PATH, diff --git a/tests/sentry/replays/endpoints/test_project_replay_summary.py b/tests/sentry/replays/endpoints/test_project_replay_summary.py index 2f9b6466ba9e69..b2e1eadaf0f931 100644 --- a/tests/sentry/replays/endpoints/test_project_replay_summary.py +++ b/tests/sentry/replays/endpoints/test_project_replay_summary.py @@ -7,7 +7,6 @@ from django.conf import settings from django.urls import reverse -from sentry.api.utils import default_start_end_dates from sentry.replays.endpoints.project_replay_summary import ( SEER_POLL_STATE_ENDPOINT_PATH, SEER_START_TASK_ENDPOINT_PATH, @@ -145,91 +144,6 @@ def test_post_simple(self, mock_make_seer_api_request: Mock) -> None: assert request_body["project_id"] == self.project.id assert request_body["temperature"] is None - @patch("sentry.replays.endpoints.project_replay_summary.process_raw_response") - @patch("sentry.replays.endpoints.project_replay_summary.query_replay_instance") - @patch("sentry.replays.endpoints.project_replay_summary.make_signed_seer_api_request") - def test_post_with_only_start_timestamp_missing( - self, - mock_make_seer_api_request: Mock, - mock_query_replay_instance: Mock, - mock_process_raw_response: Mock, - ) -> None: - """Test that when only started_at is None, we use default for start but keep the actual end.""" - mock_make_seer_api_request.return_value = MockSeerResponse( - 200, json_data={"hello": "world"} - ) - mock_query_replay_instance.return_value = [{"data": "mock"}] - - specific_end = datetime.now(UTC) - timedelta(days=1) - mock_process_raw_response.return_value = [ - { - "started_at": None, - "finished_at": specific_end.isoformat(), - } - ] - - with self.feature(self.features): - response = self.client.post( - self.url, data={"num_segments": 2}, content_type="application/json" - ) - - assert response.status_code == 200 - call_args = mock_make_seer_api_request.call_args - request_body = json.loads(call_args[1]["body"].decode()) - - default_start, default_end = default_start_end_dates() - - # Verify start uses default but end uses actual value - assert abs( - datetime.fromisoformat(request_body["replay_start"]) - default_start - ) <= timedelta(seconds=1) - assert abs(datetime.fromisoformat(request_body["replay_end"]) - specific_end) <= timedelta( - seconds=1 - ) - - @patch("sentry.replays.endpoints.project_replay_summary.process_raw_response") - @patch("sentry.replays.endpoints.project_replay_summary.query_replay_instance") - @patch("sentry.replays.endpoints.project_replay_summary.make_signed_seer_api_request") - def test_post_with_only_end_timestamp_missing( - self, - mock_make_seer_api_request: Mock, - mock_query_replay_instance: Mock, - mock_process_raw_response: Mock, - ) -> None: - """Test that when only finished_at is None, we use default for end but keep the actual start.""" - mock_make_seer_api_request.return_value = MockSeerResponse( - 200, json_data={"hello": "world"} - ) - mock_query_replay_instance.return_value = [{"data": "mock"}] - - # Set a specific start time but no end time - specific_start = datetime.now(UTC) - timedelta(days=2) - mock_process_raw_response.return_value = [ - { - "started_at": specific_start.isoformat(), - "finished_at": None, - } - ] - - with self.feature(self.features): - response = self.client.post( - self.url, data={"num_segments": 2}, content_type="application/json" - ) - - assert response.status_code == 200 - call_args = mock_make_seer_api_request.call_args - request_body = json.loads(call_args[1]["body"].decode()) - - default_start, default_end = default_start_end_dates() - - # Verify start uses actual value but end uses default - assert abs( - datetime.fromisoformat(request_body["replay_start"]) - specific_start - ) <= timedelta(seconds=1) - assert abs(datetime.fromisoformat(request_body["replay_end"]) - default_end) <= timedelta( - seconds=1 - ) - def test_post_replay_not_found(self) -> None: with self.feature(self.features): response = self.client.post( From 4ce0a08cb0f3dba98d089dcfce3dc3be32bed616 Mon Sep 17 00:00:00 2001 From: Michelle Zhang <56095982+michellewzhang@users.noreply.github.com> Date: Fri, 14 Nov 2025 10:50:18 -0800 Subject: [PATCH 3/4] :recycle: simplify --- .../replays/endpoints/project_replay_summary.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/sentry/replays/endpoints/project_replay_summary.py b/src/sentry/replays/endpoints/project_replay_summary.py index 5dc36fa6433202..c1863ec3393185 100644 --- a/src/sentry/replays/endpoints/project_replay_summary.py +++ b/src/sentry/replays/endpoints/project_replay_summary.py @@ -1,5 +1,4 @@ import logging -from datetime import datetime from typing import Any import sentry_sdk @@ -194,21 +193,15 @@ def post(self, request: Request, project: Project, replay_id: str) -> Response: status=404, ) - # Extract start and end times from the replay, falling back to 90-day defaults if missing. + # Extract start and end times from the replay (pass None if missing). replay = process_raw_response(snuba_response, fields=[])[0] - started_at = replay.get("started_at") - replay_start = datetime.fromisoformat(started_at) if started_at else None - - finished_at = replay.get("finished_at") - replay_end = datetime.fromisoformat(finished_at) if finished_at else None - return self.make_seer_request( SEER_START_TASK_ENDPOINT_PATH, { "replay_id": replay_id, - "replay_start": replay_start.isoformat(), - "replay_end": replay_end.isoformat(), + "replay_start": replay.get("started_at"), + "replay_end": replay.get("finished_at"), "num_segments": num_segments, "organization_id": project.organization.id, "project_id": project.id, From b639ccfddff4922b92596c6bd6d538567ba497fe Mon Sep 17 00:00:00 2001 From: Michelle Zhang <56095982+michellewzhang@users.noreply.github.com> Date: Fri, 14 Nov 2025 10:56:31 -0800 Subject: [PATCH 4/4] :art: ref --- .../endpoints/project_replay_summary.py | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/src/sentry/replays/endpoints/project_replay_summary.py b/src/sentry/replays/endpoints/project_replay_summary.py index c1863ec3393185..542da6aed0784f 100644 --- a/src/sentry/replays/endpoints/project_replay_summary.py +++ b/src/sentry/replays/endpoints/project_replay_summary.py @@ -1,4 +1,5 @@ import logging +from datetime import datetime from typing import Any import sentry_sdk @@ -193,15 +194,39 @@ def post(self, request: Request, project: Project, replay_id: str) -> Response: status=404, ) - # Extract start and end times from the replay (pass None if missing). + # Extract start and end times from the replay (pass None if missing or invalid). replay = process_raw_response(snuba_response, fields=[])[0] + def validate_iso_timestamp(timestamp: str | None) -> str | None: + """Validate that timestamp is a valid ISO format string, return None if invalid.""" + if not timestamp: + return None + try: + datetime.fromisoformat(timestamp) + return timestamp + except (ValueError, TypeError): + return None + + replay_start = validate_iso_timestamp(replay.get("started_at")) + replay_end = validate_iso_timestamp(replay.get("finished_at")) + + if not replay_start or not replay_end: + logger.warning( + "Replay start or end time missing or invalid.", + extra={ + "started_at": replay.get("started_at"), + "finished_at": replay.get("finished_at"), + "replay_id": replay_id, + "organization_id": project.organization.id, + }, + ) + return self.make_seer_request( SEER_START_TASK_ENDPOINT_PATH, { "replay_id": replay_id, - "replay_start": replay.get("started_at"), - "replay_end": replay.get("finished_at"), + "replay_start": replay_start, + "replay_end": replay_end, "num_segments": num_segments, "organization_id": project.organization.id, "project_id": project.id,