From 0233e278f36a8810ef92dc79e5e574d3dec93580 Mon Sep 17 00:00:00 2001
From: Tony Xiao <txiao@sentry.io>
Date: Wed, 1 Feb 2023 10:33:52 -0500
Subject: [PATCH] ref(profiling): Do not send single sample profiles (#1879)

Single sample profiles are dropped in relay so there's no reason to send them to
begin with. Save the extra bytes by just not sending it.
---
 sentry_sdk/profiler.py                        | 28 +++++++++---
 sentry_sdk/tracing.py                         |  2 +-
 tests/integrations/django/asgi/test_asgi.py   | 44 +++++++++++--------
 tests/integrations/fastapi/test_fastapi.py    |  6 +++
 .../integrations/starlette/test_starlette.py  |  1 +
 tests/integrations/wsgi/test_wsgi.py          |  1 +
 tests/test_profiler.py                        | 38 ++++++++++++++--
 7 files changed, 91 insertions(+), 29 deletions(-)

diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py
index 2f1f0f8ab5..84bdaec05e 100644
--- a/sentry_sdk/profiler.py
+++ b/sentry_sdk/profiler.py
@@ -135,14 +135,18 @@ def is_gevent():
 
 _scheduler = None  # type: Optional[Scheduler]
 
+# The default sampling frequency to use. This is set at 101 in order to
+# mitigate the effects of lockstep sampling.
+DEFAULT_SAMPLING_FREQUENCY = 101
+
+
+# The minimum number of unique samples that must exist in a profile to be
+# considered valid.
+PROFILE_MINIMUM_SAMPLES = 2
+
 
 def setup_profiler(options):
     # type: (Dict[str, Any]) -> bool
-    """
-    `buffer_secs` determines the max time a sample will be buffered for
-    `frequency` determines the number of samples to take per second (Hz)
-    """
-
     global _scheduler
 
     if _scheduler is not None:
@@ -153,7 +157,7 @@ def setup_profiler(options):
         logger.warn("profiling is only supported on Python >= 3.3")
         return False
 
-    frequency = 101
+    frequency = DEFAULT_SAMPLING_FREQUENCY
 
     if is_gevent():
         # If gevent has patched the threading modules then we cannot rely on
@@ -429,6 +433,8 @@ def __init__(
         self.stacks = []  # type: List[ProcessedStack]
         self.samples = []  # type: List[ProcessedSample]
 
+        self.unique_samples = 0
+
         transaction._profile = self
 
     def update_active_thread_id(self):
@@ -540,6 +546,8 @@ def write(self, ts, sample):
             self.stop()
             return
 
+        self.unique_samples += 1
+
         elapsed_since_start_ns = str(offset)
 
         for tid, (stack_id, stack) in sample:
@@ -641,6 +649,14 @@ def to_json(self, event_opt, options):
             ],
         }
 
+    def valid(self):
+        # type: () -> bool
+        return (
+            self.sampled is not None
+            and self.sampled
+            and self.unique_samples >= PROFILE_MINIMUM_SAMPLES
+        )
+
 
 class Scheduler(object):
     mode = "unknown"
diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py
index 0e3cb97036..332b3a0c18 100644
--- a/sentry_sdk/tracing.py
+++ b/sentry_sdk/tracing.py
@@ -716,7 +716,7 @@ def finish(self, hub=None, end_timestamp=None):
             "spans": finished_spans,
         }  # type: Event
 
-        if self._profile is not None and self._profile.sampled:
+        if self._profile is not None and self._profile.valid():
             event["profile"] = self._profile
             contexts.update({"profile": self._profile.get_profile_context()})
             self._profile = None
diff --git a/tests/integrations/django/asgi/test_asgi.py b/tests/integrations/django/asgi/test_asgi.py
index 3e8a79b763..d7ea06d85a 100644
--- a/tests/integrations/django/asgi/test_asgi.py
+++ b/tests/integrations/django/asgi/test_asgi.py
@@ -7,6 +7,11 @@
 from sentry_sdk.integrations.django import DjangoIntegration
 from tests.integrations.django.myapp.asgi import channels_application
 
+try:
+    from unittest import mock  # python 3.3 and above
+except ImportError:
+    import mock  # python < 3.3
+
 APPS = [channels_application]
 if django.VERSION >= (3, 0):
     from tests.integrations.django.myapp.asgi import asgi_application
@@ -81,32 +86,33 @@ async def test_async_views(sentry_init, capture_events, application):
 async def test_active_thread_id(
     sentry_init, capture_envelopes, teardown_profiling, endpoint, application
 ):
-    sentry_init(
-        integrations=[DjangoIntegration()],
-        traces_sample_rate=1.0,
-        _experiments={"profiles_sample_rate": 1.0},
-    )
+    with mock.patch("sentry_sdk.profiler.PROFILE_MINIMUM_SAMPLES", 0):
+        sentry_init(
+            integrations=[DjangoIntegration()],
+            traces_sample_rate=1.0,
+            _experiments={"profiles_sample_rate": 1.0},
+        )
 
-    envelopes = capture_envelopes()
+        envelopes = capture_envelopes()
 
-    comm = HttpCommunicator(application, "GET", endpoint)
-    response = await comm.get_response()
-    assert response["status"] == 200, response["body"]
+        comm = HttpCommunicator(application, "GET", endpoint)
+        response = await comm.get_response()
+        assert response["status"] == 200, response["body"]
 
-    await comm.wait()
+        await comm.wait()
 
-    data = json.loads(response["body"])
+        data = json.loads(response["body"])
 
-    envelopes = [envelope for envelope in envelopes]
-    assert len(envelopes) == 1
+        envelopes = [envelope for envelope in envelopes]
+        assert len(envelopes) == 1
 
-    profiles = [item for item in envelopes[0].items if item.type == "profile"]
-    assert len(profiles) == 1
+        profiles = [item for item in envelopes[0].items if item.type == "profile"]
+        assert len(profiles) == 1
 
-    for profile in profiles:
-        transactions = profile.payload.json["transactions"]
-        assert len(transactions) == 1
-        assert str(data["active"]) == transactions[0]["active_thread_id"]
+        for profile in profiles:
+            transactions = profile.payload.json["transactions"]
+            assert len(transactions) == 1
+            assert str(data["active"]) == transactions[0]["active_thread_id"]
 
 
 @pytest.mark.asyncio
diff --git a/tests/integrations/fastapi/test_fastapi.py b/tests/integrations/fastapi/test_fastapi.py
index 7d3aa3ffbd..17b1cecd52 100644
--- a/tests/integrations/fastapi/test_fastapi.py
+++ b/tests/integrations/fastapi/test_fastapi.py
@@ -12,6 +12,11 @@
 from sentry_sdk.integrations.starlette import StarletteIntegration
 from sentry_sdk.integrations.asgi import SentryAsgiMiddleware
 
+try:
+    from unittest import mock  # python 3.3 and above
+except ImportError:
+    import mock  # python < 3.3
+
 
 def fastapi_app_factory():
     app = FastAPI()
@@ -155,6 +160,7 @@ def test_legacy_setup(
 
 
 @pytest.mark.parametrize("endpoint", ["/sync/thread_ids", "/async/thread_ids"])
+@mock.patch("sentry_sdk.profiler.PROFILE_MINIMUM_SAMPLES", 0)
 def test_active_thread_id(sentry_init, capture_envelopes, teardown_profiling, endpoint):
     sentry_init(
         traces_sample_rate=1.0,
diff --git a/tests/integrations/starlette/test_starlette.py b/tests/integrations/starlette/test_starlette.py
index 5e4b071235..03cb270049 100644
--- a/tests/integrations/starlette/test_starlette.py
+++ b/tests/integrations/starlette/test_starlette.py
@@ -846,6 +846,7 @@ def test_legacy_setup(
 
 
 @pytest.mark.parametrize("endpoint", ["/sync/thread_ids", "/async/thread_ids"])
+@mock.patch("sentry_sdk.profiler.PROFILE_MINIMUM_SAMPLES", 0)
 def test_active_thread_id(sentry_init, capture_envelopes, teardown_profiling, endpoint):
     sentry_init(
         traces_sample_rate=1.0,
diff --git a/tests/integrations/wsgi/test_wsgi.py b/tests/integrations/wsgi/test_wsgi.py
index 2aed842d3f..4f9886c6f6 100644
--- a/tests/integrations/wsgi/test_wsgi.py
+++ b/tests/integrations/wsgi/test_wsgi.py
@@ -287,6 +287,7 @@ def sample_app(environ, start_response):
 @pytest.mark.skipif(
     sys.version_info < (3, 3), reason="Profiling is only supported in Python >= 3.3"
 )
+@mock.patch("sentry_sdk.profiler.PROFILE_MINIMUM_SAMPLES", 0)
 def test_profile_sent(
     sentry_init,
     capture_envelopes,
diff --git a/tests/test_profiler.py b/tests/test_profiler.py
index 56f3470335..227d538084 100644
--- a/tests/test_profiler.py
+++ b/tests/test_profiler.py
@@ -1,5 +1,4 @@
 import inspect
-import mock
 import os
 import sys
 import threading
@@ -21,6 +20,11 @@
 from sentry_sdk.tracing import Transaction
 from sentry_sdk._queue import Queue
 
+try:
+    from unittest import mock  # python 3.3 and above
+except ImportError:
+    import mock  # python < 3.3
+
 try:
     import gevent
 except ImportError:
@@ -88,6 +92,7 @@ def test_profiler_setup_twice(teardown_profiling):
         pytest.param(None, 0, id="profiler not enabled"),
     ],
 )
+@mock.patch("sentry_sdk.profiler.PROFILE_MINIMUM_SAMPLES", 0)
 def test_profiled_transaction(
     sentry_init,
     capture_envelopes,
@@ -115,6 +120,7 @@ def test_profiled_transaction(
     assert len(items["profile"]) == profile_count
 
 
+@mock.patch("sentry_sdk.profiler.PROFILE_MINIMUM_SAMPLES", 0)
 def test_profile_context(
     sentry_init,
     capture_envelopes,
@@ -145,6 +151,32 @@ def test_profile_context(
     }
 
 
+def test_minimum_unique_samples_required(
+    sentry_init,
+    capture_envelopes,
+    teardown_profiling,
+):
+    sentry_init(
+        traces_sample_rate=1.0,
+        _experiments={"profiles_sample_rate": 1.0},
+    )
+
+    envelopes = capture_envelopes()
+
+    with start_transaction(name="profiling"):
+        pass
+
+    items = defaultdict(list)
+    for envelope in envelopes:
+        for item in envelope.items:
+            items[item.type].append(item)
+
+    assert len(items["transaction"]) == 1
+    # because we dont leave any time for the profiler to
+    # take any samples, it should be not be sent
+    assert len(items["profile"]) == 0
+
+
 def get_frame(depth=1):
     """
     This function is not exactly true to its name. Depending on
@@ -478,7 +510,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
         pytest.param(GeventScheduler, marks=requires_gevent, id="gevent scheduler"),
     ],
 )
-@mock.patch("sentry_sdk.profiler.MAX_PROFILE_DURATION_NS", int(1))
+@mock.patch("sentry_sdk.profiler.MAX_PROFILE_DURATION_NS", 1)
 def test_max_profile_duration_reached(scheduler_class):
     sample = [
         (
@@ -792,7 +824,7 @@ def test_max_profile_duration_reached(scheduler_class):
         pytest.param(GeventScheduler, marks=requires_gevent, id="gevent scheduler"),
     ],
 )
-@mock.patch("sentry_sdk.profiler.MAX_PROFILE_DURATION_NS", int(5))
+@mock.patch("sentry_sdk.profiler.MAX_PROFILE_DURATION_NS", 5)
 def test_profile_processing(
     DictionaryContaining,  # noqa: N803
     scheduler_class,