From c6f5cd2accbc76043116802a4dc965b1d443e403 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Fri, 8 Mar 2024 10:16:12 -0500
Subject: [PATCH] Change strip PII semantics

---
 sentry_sdk/consts.py                     |   4 +-
 sentry_sdk/integrations/openai.py        |  10 +--
 tests/integrations/openai/test_openai.py | 108 ++++++++++++++++-------
 3 files changed, 85 insertions(+), 37 deletions(-)

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index ce38ebeef9..e4edfddef1 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -219,8 +219,8 @@ class OP:
     MIDDLEWARE_STARLITE = "middleware.starlite"
     MIDDLEWARE_STARLITE_RECEIVE = "middleware.starlite.receive"
     MIDDLEWARE_STARLITE_SEND = "middleware.starlite.send"
-    OPENAI_CHAT_COMPLETIONS_CREATE = "openai.chat_completions.create"
-    OPENAI_EMBEDDINGS_CREATE = "openai.embeddings.create"
+    OPENAI_CHAT_COMPLETIONS_CREATE = "ai.chat_completions.create.openai"
+    OPENAI_EMBEDDINGS_CREATE = "ai.embeddings.create.openai"
     QUEUE_SUBMIT_ARQ = "queue.submit.arq"
     QUEUE_TASK_ARQ = "queue.task.arq"
     QUEUE_SUBMIT_CELERY = "queue.submit.celery"
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 56d20613db..5c05a43916 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -50,7 +50,7 @@ def count_tokens(s):
 class OpenAIIntegration(Integration):
     identifier = "openai"
 
-    def __init__(self, include_prompts=False):
+    def __init__(self, include_prompts=True):
         # type: (OpenAIIntegration, bool) -> None
         self.include_prompts = include_prompts
 
@@ -159,13 +159,13 @@ def new_chat_completion(*args, **kwargs):
             raise e from None
 
         with capture_internal_exceptions():
-            if _should_send_default_pii() or integration.include_prompts:
+            if _should_send_default_pii() and integration.include_prompts:
                 span.set_data("ai.input_messages", messages)
             span.set_data("ai.model_id", model)
             span.set_data("ai.streaming", streaming)
 
             if hasattr(res, "choices"):
-                if _should_send_default_pii() or integration.include_prompts:
+                if _should_send_default_pii() and integration.include_prompts:
                     span.set_data(
                         "ai.responses", list(map(lambda x: x.message, res.choices))
                     )
@@ -198,7 +198,7 @@ def new_iterator():
                             )
                             if (
                                 _should_send_default_pii()
-                                or integration.include_prompts
+                                and integration.include_prompts
                             ):
                                 span.set_data("ai.responses", all_responses)
                             _calculate_chat_completion_usage(
@@ -235,7 +235,7 @@ def new_embeddings_create(*args, **kwargs):
             description="OpenAI Embedding Creation",
         ) as span:
             if "input" in kwargs and (
-                _should_send_default_pii() or integration.include_prompts
+                _should_send_default_pii() and integration.include_prompts
             ):
                 if isinstance(kwargs["input"], str):
                     span.set_data("ai.input_messages", [kwargs["input"]])
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 761f605168..d710d2208a 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -17,33 +17,39 @@
 from unittest import mock  # python 3.3 and above
 
 
+EXAMPLE_CHAT_COMPLETION = ChatCompletion(
+    id="chat-id",
+    choices=[
+        Choice(
+            index=0,
+            finish_reason="stop",
+            message=ChatCompletionMessage(
+                role="assistant", content="the model response"
+            ),
+        )
+    ],
+    created=10000000,
+    model="model-id",
+    object="chat.completion",
+    usage=CompletionUsage(
+        completion_tokens=10,
+        prompt_tokens=20,
+        total_tokens=30,
+    ),
+)
+
+
 def test_nonstreaming_chat_completion(sentry_init, capture_events):
     sentry_init(
-        integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
     )
     events = capture_events()
 
     client = OpenAI(api_key="z")
-    returned_chat = ChatCompletion(
-        id="chat-id",
-        choices=[
-            Choice(
-                index=0,
-                finish_reason="stop",
-                message=ChatCompletionMessage(role="assistant", content="response"),
-            )
-        ],
-        created=10000000,
-        model="model-id",
-        object="chat.completion",
-        usage=CompletionUsage(
-            completion_tokens=10,
-            prompt_tokens=20,
-            total_tokens=30,
-        ),
-    )
+    client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
 
-    client.chat.completions._post = mock.Mock(return_value=returned_chat)
     with start_transaction(name="openai tx"):
         response = (
             client.chat.completions.create(
@@ -53,17 +59,63 @@ def test_nonstreaming_chat_completion(sentry_init, capture_events):
             .message.content
         )
 
-    assert response == "response"
+    assert response == "the model response"
     tx = events[0]
     assert tx["type"] == "transaction"
     span = tx["spans"][0]
-    assert span["op"] == "openai.chat_completions.create"
+    assert span["op"] == "ai.chat_completions.create.openai"
+    assert "the model response" in span["data"]["ai.responses"][0]
 
     assert span["data"][COMPLETION_TOKENS_USED] == 10
     assert span["data"][PROMPT_TOKENS_USED] == 20
     assert span["data"][TOTAL_TOKENS_USED] == 30
 
 
+def test_stripped_pii_without_send_default_pii(sentry_init, capture_events):
+    sentry_init(
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    client = OpenAI(api_key="z")
+    client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
+
+    with start_transaction(name="openai tx"):
+        client.chat.completions.create(
+            model="some-model", messages=[{"role": "system", "content": "hello"}]
+        )
+
+    tx = events[0]
+    assert tx["type"] == "transaction"
+    span = tx["spans"][0]
+    assert "ai.input_messages" not in span["data"]
+    assert "ai.responses" not in span["data"]
+
+
+def test_stripped_pii_without_send_prompts(sentry_init, capture_events):
+    sentry_init(
+        integrations=[OpenAIIntegration(include_prompts=False)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    client = OpenAI(api_key="z")
+    client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
+
+    with start_transaction(name="openai tx"):
+        client.chat.completions.create(
+            model="some-model", messages=[{"role": "system", "content": "hello"}]
+        )
+
+    tx = events[0]
+    assert tx["type"] == "transaction"
+    span = tx["spans"][0]
+    assert "ai.input_messages" not in span["data"]
+    assert "ai.responses" not in span["data"]
+
+
 # noinspection PyTypeChecker
 def test_streaming_chat_completion(sentry_init, capture_events):
     sentry_init(
@@ -121,7 +173,7 @@ def test_streaming_chat_completion(sentry_init, capture_events):
     tx = events[0]
     assert tx["type"] == "transaction"
     span = tx["spans"][0]
-    assert span["op"] == "openai.chat_completions.create"
+    assert span["op"] == "ai.chat_completions.create.openai"
 
     try:
         import tiktoken  # type: ignore # noqa # pylint: disable=unused-import
@@ -134,9 +186,7 @@ def test_streaming_chat_completion(sentry_init, capture_events):
 
 
 def test_bad_chat_completion(sentry_init, capture_events):
-    sentry_init(
-        integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0
-    )
+    sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
     events = capture_events()
 
     client = OpenAI(api_key="z")
@@ -153,9 +203,7 @@ def test_bad_chat_completion(sentry_init, capture_events):
 
 
 def test_embeddings_create(sentry_init, capture_events):
-    sentry_init(
-        integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0
-    )
+    sentry_init(integrations=[OpenAIIntegration()], traces_sample_rate=1.0)
     events = capture_events()
 
     client = OpenAI(api_key="z")
@@ -181,7 +229,7 @@ def test_embeddings_create(sentry_init, capture_events):
     tx = events[0]
     assert tx["type"] == "transaction"
     span = tx["spans"][0]
-    assert span["op"] == "openai.embeddings.create"
+    assert span["op"] == "ai.embeddings.create.openai"
 
     assert span["data"][PROMPT_TOKENS_USED] == 20
     assert span["data"][TOTAL_TOKENS_USED] == 30