getsentry · aliu39 · Nov 14, 2025 · Nov 12, 2025 · Nov 12, 2025 · Nov 12, 2025
@@ -111,6 +111,75 @@ def query_replay_instance(
     )["data"]
 
 
+def query_replay_id_by_prefix(
+    project_ids: list[int],
+    replay_id_prefix: str,
+    start: datetime,
+    end: datetime,
+    organization: Organization | None = None,
+) -> str | None:
+    """
+    Using a string prefix, query for a full replay ID in the given time range and project list, returning the first matching ID.
+    Date range is chunked into 14 day intervals, newest to oldest, to avoid timeouts.
+
+    TODO: This query ignores the replay_id column index and can do large scans. At the moment it's only used for the Seer Explorer replay details tool.
+    This is a good candidate for optimization, which can be done with a materialized string column for the first 8 chars, and a secondary index.
+    Alternatively we can try more consistent ways of passing the full ID to Explorer.
+    """
+
+    if len(replay_id_prefix) < 8 or len(replay_id_prefix) >= 32:
+        # Enforce length of 8-31 characters.
+        return None
+
+    # Enforce valid hex chars.
+    replay_id_prefix = replay_id_prefix.lower()
+    try:
+        int(replay_id_prefix, 16)
+    except ValueError:
+        return None
+
+    window_size = timedelta(days=14)
+    window_end = end
+    while window_end > start:
+        window_start = max(window_end - window_size, start)
+
+        query = Query(
+            match=Entity("replays"),
+            select=[Column("replay_id")],
+            where=[
+                Condition(Column("project_id"), Op.IN, project_ids),
+                Condition(
+                    Function(
+                        "startsWith",
+                        parameters=[
+                            Function("toString", parameters=[Column("replay_id")]),
+                            replay_id_prefix,
+                        ],
+                    ),
+                    Op.EQ,
+                    1,
+                ),
+                Condition(Column("timestamp"), Op.GTE, window_start),
+                Condition(Column("timestamp"), Op.LT, window_end),
+            ],
+            granularity=Granularity(3600),
+            limit=Limit(1),
+        )
+
+        snuba_response = execute_query(
+            query=query,
+            tenant_id={"organization_id": organization.id} if organization else {},
+            referrer="replays.query.short_id_details_query",
+        )["data"]
+
+        if snuba_response:
+            return str(snuba_response[0]["replay_id"])
+
+        window_end = window_start
+
+    return None
+
+
 def query_replay_viewed_by_ids(
     project_id: int | list[int],
     replay_id: str,
@@ -973,6 +1042,7 @@ def get_replay_range(
     project_id: int,
     replay_id: str,
 ) -> tuple[datetime, datetime] | None:
+    """Get the min and max timestamps for a replay. This query is redundant if you're already using query_replay_instance - use the started_at and finished_at fields instead."""
     query = Query(
         match=Entity("replays"),
         select=[

@@ -1,10 +1,9 @@
 import logging
+import uuid
 from datetime import UTC, datetime, timedelta, timezone
-from typing import Any, Literal
+from typing import Any, Literal, cast
 
-from django.urls import reverse
-
-from sentry import eventstore
+from sentry import eventstore, features
 from sentry.api import client
 from sentry.api.serializers.base import serialize
 from sentry.api.serializers.models.event import EventSerializer, IssueEventSerializerResponse
@@ -16,6 +15,8 @@
 from sentry.models.organization import Organization
 from sentry.models.project import Project
 from sentry.models.repository import Repository
+from sentry.replays.post_process import process_raw_response
+from sentry.replays.query import query_replay_id_by_prefix, query_replay_instance
 from sentry.search.eap.types import SearchResolverConfig
 from sentry.search.events.types import SnubaParams
 from sentry.seer.autofix.autofix import get_all_tags_overview
@@ -199,6 +200,9 @@ def get_trace_waterfall(trace_id: str, organization_id: int) -> EAPTrace | None:
 
     # Get full trace id if a short id is provided. Queries EAP for a single span.
     # Use sliding 14-day windows starting from most recent, up to 90 days in the past, to avoid timeouts.
+    # TODO: This query ignores the trace_id column index and can do large scans, and is a good candidate for optimization.
+    # This can be done with a materialized string column for the first 8 chars and a secondary index.
+    # Alternatively we can try more consistent ways of passing the full ID to Explorer.
     if len(trace_id) < 32:
         full_trace_id = None
         now = datetime.now(timezone.utc)
@@ -515,15 +519,15 @@ def get_replay_metadata(
     *,
     replay_id: str,
     organization_id: int,
-    project_id: int | None = None,
+    project_slug: str | None = None,
 ) -> dict[str, Any] | None:
     """
     Get the metadata for a replay through an aggregate replay event query.
 
     Args:
-        replay_id: The ID of the replay.
+        replay_id: The ID of the replay. Either a valid UUID or a 8-character hex string prefix. If known, the full ID is recommended for performance.
         organization_id: The ID of the organization the replay belongs to.
-        project_id: The projects to query. If not provided, all projects in the organization will be queried.
+        project_slug: The slug of the project to query. If not provided, all projects in the organization will be queried.
 
     Returns:
         A dict containing the metadata for the replay, or None if it's not found.
@@ -538,38 +542,79 @@ def get_replay_metadata(
         )
         return None
 
-    path = reverse(
-        "sentry-api-0-organization-replay-details",
-        args=(organization.slug, replay_id),
+    if not features.has("organizations:session-replay", organization):
+        return None
+
+    p_ids_and_slugs = list(
+        Project.objects.filter(
+            organization_id=organization.id,
+            status=ObjectStatus.ACTIVE,
+            **({"slug": project_slug} if project_slug else {}),
+        ).values_list("id", "slug")
     )
-    path = path.strip("/")[len("api/0") :] + "/"
 
-    params = {}
-    if project_id:
-        params["project"] = project_id
+    if not p_ids_and_slugs:
+        logger.warning(
+            "No projects found for given organization and project slug",
+            extra={"organization_id": organization_id, "project_slug": project_slug},
+        )
+        return None
 
-    resp = client.get(
-        auth=ApiKey(organization_id=organization.id, scope_list=["org:read", "project:read"]),
-        user=None,
-        path=path,
-        params=params,
-    )
+    start, end = default_start_end_dates()
 
-    if resp.status_code != 200 or not (resp.data or {}).get("data"):
+    if len(replay_id) < 32:
+        # Subquery for the full replay ID.
+        full_replay_id = query_replay_id_by_prefix(
+            project_ids=[id for id, _ in p_ids_and_slugs],
+            replay_id_prefix=replay_id,
+            start=start,
+            end=end,
+            organization=organization,
+        )
+        if not full_replay_id:
+            logger.warning(
+                "Replay short ID lookup failed",
+                extra={"replay_id": replay_id, "organization_id": organization_id},
+            )
+            return None
+
+        replay_id = full_replay_id
+
+    try:
+        replay_id = str(
+            uuid.UUID(replay_id)
+        )  # Normalizing with dashes is recommended for the query.
+    except ValueError:
         logger.warning(
-            "Failed to get replay metadata",
+            "Invalid replay ID", extra={"replay_id": replay_id, "organization_id": organization_id}
+        )
+        return None
+
+    snuba_response = query_replay_instance(
+        project_id=[id for id, _ in p_ids_and_slugs],
+        replay_id=replay_id,
+        start=start,
+        end=end,
+        organization=organization,
+        request_user_id=None,
+    )
+    response = process_raw_response(
+        snuba_response,
+        fields=[],
+    )
+    if not response:
+        logger.warning(
+            "Replay instance not found - no data returned from query",
             extra={
                 "replay_id": replay_id,
                 "organization_id": organization_id,
-                "project_id": project_id,
-                "status_code": resp.status_code,
             },
         )
         return None
 
     # Add project_slug field.
-    result = resp.data["data"]
-    project = Project.objects.get(id=result["project_id"])
-    result["project_slug"] = project.slug
-
+    result = cast(dict[str, Any], response[0])
+    result["project_slug"] = next(
+        filter(lambda x: x[0] == int(result["project_id"]), p_ids_and_slugs)
+    )[1]
     return result